diff --git a/html-proofer.gemspec b/html-proofer.gemspec
index 33d29bd0..0c3cff3f 100644
--- a/html-proofer.gemspec
+++ b/html-proofer.gemspec
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
spec.add_dependency("addressable", "~> 2.3")
spec.add_dependency("async", "~> 2.1")
spec.add_dependency("nokogiri", "~> 1.13")
+ spec.add_dependency("pdf-reader", "~> 2.11")
spec.add_dependency("rainbow", "~> 3.0")
spec.add_dependency("typhoeus", "~> 1.3")
spec.add_dependency("yell", "~> 2.0")
diff --git a/lib/html_proofer/attribute/url.rb b/lib/html_proofer/attribute/url.rb
index 5c2f73f3..b554c90e 100644
--- a/lib/html_proofer/attribute/url.rb
+++ b/lib/html_proofer/attribute/url.rb
@@ -141,12 +141,16 @@ def file_path
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
# relative links, path is a file
- elsif File.exist?(File.expand_path(path,
- @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
+ elsif File.exist?(File.expand_path(
+ path,
+ @runner.current_source,
+ )) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
File.dirname(@runner.current_filename)
# relative links in nested dir, path is a file
- elsif File.exist?(File.join(File.dirname(@runner.current_filename),
- path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
+ elsif File.exist?(File.join(
+ File.dirname(@runner.current_filename),
+ path,
+ )) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
File.dirname(@runner.current_filename)
# relative link, path is a directory
else
diff --git a/lib/html_proofer/check.rb b/lib/html_proofer/check.rb
index ac89b06d..32e40a5f 100644
--- a/lib/html_proofer/check.rb
+++ b/lib/html_proofer/check.rb
@@ -25,8 +25,14 @@ def run
end
def add_failure(description, line: nil, status: nil, content: nil)
- @failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status,
- content: content)
+ @failures << Failure.new(
+ @runner.current_filename,
+ short_name,
+ description,
+ line: line,
+ status: status,
+ content: content,
+ )
end
def short_name
diff --git a/lib/html_proofer/check/favicon.rb b/lib/html_proofer/check/favicon.rb
index cf11909e..0c22d34d 100644
--- a/lib/html_proofer/check/favicon.rb
+++ b/lib/html_proofer/check/favicon.rb
@@ -17,13 +17,19 @@ def run
if found
if @favicon.url.protocol_relative?
- add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
- line: @favicon.line, content: @favicon.content)
+ add_failure(
+ "favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @favicon.line,
+ content: @favicon.content,
+ )
elsif @favicon.url.remote?
add_to_external_urls(@favicon.url, @favicon.line)
elsif !@favicon.url.exists?
- add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
- content: @favicon.content)
+ add_failure(
+ "internal favicon #{@favicon.url.raw_attribute} does not exist",
+ line: @favicon.line,
+ content: @favicon.content,
+ )
end
else
add_failure("no favicon provided")
diff --git a/lib/html_proofer/check/images.rb b/lib/html_proofer/check/images.rb
index 82e74744..b5b34aab 100644
--- a/lib/html_proofer/check/images.rb
+++ b/lib/html_proofer/check/images.rb
@@ -12,27 +12,39 @@ def run
next if @img.ignore?
# screenshot filenames should return because of terrible names
- add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line,
- content: @img.content) if terrible_filename?
+ add_failure(
+ "image has a terrible filename (#{@img.url.raw_attribute})",
+ line: @img.line,
+ content: @img.content,
+ ) if terrible_filename?
# does the image exist?
if missing_src?
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
elsif @img.url.protocol_relative?
- add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
- line: @img.line, content: @img.content)
+ add_failure(
+ "image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @img.line,
+ content: @img.content,
+ )
elsif @img.url.remote?
add_to_external_urls(@img.url, @img.line)
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
- add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
- content: @img.content)
+ add_failure(
+ "internal image #{@img.url.raw_attribute} does not exist",
+ line: @img.line,
+ content: @img.content,
+ )
elsif @img.multiple_srcsets? || @img.multiple_sizes?
@img.srcsets_wo_sizes.each do |srcset|
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
if srcset_url.protocol_relative?
- add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
- line: @img.line, content: @img.content)
+ add_failure(
+ "image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @img.line,
+ content: @img.content,
+ )
elsif srcset_url.remote?
add_to_external_urls(srcset_url.url, @img.line)
elsif !srcset_url.exists?
@@ -44,16 +56,25 @@ def run
# if this is an img element, check that the alt attribute is present
if @img.img_tag? && !ignore_element?
if missing_alt_tag? && !ignore_missing_alt?
- add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line,
- content: @img.content)
+ add_failure(
+ "image #{@img.url.raw_attribute} does not have an alt attribute",
+ line: @img.line,
+ content: @img.content,
+ )
elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt?
- add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line,
- content: @img.content)
+ add_failure(
+ "image #{@img.url.raw_attribute} has an alt attribute, but no content",
+ line: @img.line,
+ content: @img.content,
+ )
end
end
- add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line,
- content: @img.content) if @runner.enforce_https? && @img.url.http?
+ add_failure(
+ "image #{@img.url.raw_attribute} uses the http scheme",
+ line: @img.line,
+ content: @img.content,
+ ) if @runner.enforce_https? && @img.url.http?
end
external_urls
diff --git a/lib/html_proofer/check/links.rb b/lib/html_proofer/check/links.rb
index 89e5f118..ec72f001 100644
--- a/lib/html_proofer/check/links.rb
+++ b/lib/html_proofer/check/links.rb
@@ -29,8 +29,11 @@ def run
end
if @link.url.protocol_relative?
- add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
- line: @link.line, content: @link.content)
+ add_failure(
+ "#{@link.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @link.line,
+ content: @link.content,
+ )
next
end
@@ -55,8 +58,11 @@ def run
elsif @link.url.internal?
# does the local directory have a trailing slash?
if @link.url.unslashed_directory?(@link.url.absolute_path)
- add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
- line: @link.line, content: @link.content)
+ add_failure(
+ "internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
+ line: @link.line,
+ content: @link.content,
+ )
next
end
@@ -88,17 +94,26 @@ def check_schemes
def handle_mailto
if @link.url.path.empty?
- add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line,
- content: @link.content) unless ignore_empty_mailto?
+ add_failure(
+ "#{@link.url.raw_attribute} contains no email address",
+ line: @link.line,
+ content: @link.content,
+ ) unless ignore_empty_mailto?
elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path)
- add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line,
- content: @link.content)
+ add_failure(
+ "#{@link.url.raw_attribute} contains an invalid email address",
+ line: @link.line,
+ content: @link.content,
+ )
end
end
def handle_tel
- add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line,
- content: @link.content) if @link.url.path.empty?
+ add_failure(
+ "#{@link.url.raw_attribute} contains no phone number",
+ line: @link.line,
+ content: @link.content,
+ ) if @link.url.path.empty?
end
def ignore_empty_mailto?
@@ -113,13 +128,19 @@ def check_sri
return unless SRI_REL_TYPES.include?(@link.node["rel"])
if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
- add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line,
- content: @link.content)
+ add_failure(
+ "SRI and CORS not provided in: #{@link.url.raw_attribute}",
+ line: @link.line,
+ content: @link.content,
+ )
elsif blank?(@link.node["integrity"])
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content)
elsif blank?(@link.node["crossorigin"])
- add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line,
- content: @link.content)
+ add_failure(
+ "CORS not provided for external resource in: #{@link.link.url.raw_attribute}",
+ line: @link.line,
+ content: @link.content,
+ )
end
end
diff --git a/lib/html_proofer/check/open_graph.rb b/lib/html_proofer/check/open_graph.rb
index dc85fae7..fd32aef2 100644
--- a/lib/html_proofer/check/open_graph.rb
+++ b/lib/html_proofer/check/open_graph.rb
@@ -17,13 +17,19 @@ def run
elsif !@open_graph.url.valid?
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
elsif @open_graph.url.protocol_relative?
- add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
- line: @open_graph.line, content: @open_graph.content)
+ add_failure(
+ "open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @open_graph.line,
+ content: @open_graph.content,
+ )
elsif @open_graph.url.remote?
add_to_external_urls(@open_graph.url, @open_graph.line)
else
- add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line,
- content: @open_graph.content) unless @open_graph.url.exists?
+ add_failure(
+ "internal open graph #{@open_graph.url.raw_attribute} does not exist",
+ line: @open_graph.line,
+ content: @open_graph.content,
+ ) unless @open_graph.url.exists?
end
end
diff --git a/lib/html_proofer/check/scripts.rb b/lib/html_proofer/check/scripts.rb
index 01f9b564..d07999fc 100644
--- a/lib/html_proofer/check/scripts.rb
+++ b/lib/html_proofer/check/scripts.rb
@@ -14,14 +14,20 @@ def run
if missing_src?
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
elsif @script.url.protocol_relative?
- add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
- line: @script.line, content: @script.content)
+ add_failure(
+ "script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
+ line: @script.line,
+ content: @script.content,
+ )
elsif @script.url.remote?
add_to_external_urls(@script.url, @script.line)
check_sri if @runner.check_sri?
elsif !@script.url.exists?
- add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
- content: @script.content)
+ add_failure(
+ "internal script reference #{@script.src} does not exist",
+ line: @script.line,
+ content: @script.content,
+ )
end
end
@@ -34,14 +40,23 @@ def missing_src?
def check_sri
if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"])
- add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line,
- content: @script.content)
+ add_failure(
+ "SRI and CORS not provided in: #{@script.url.raw_attribute}",
+ line: @script.line,
+ content: @script.content,
+ )
elsif blank?(@script.node["integrity"])
- add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line,
- content: @script.content)
+ add_failure(
+ "Integrity is missing in: #{@script.url.raw_attribute}",
+ line: @script.line,
+ content: @script.content,
+ )
elsif blank?(@script.node["crossorigin"])
- add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line,
- content: @script.content)
+ add_failure(
+ "CORS not provided for external resource in: #{@script.url.raw_attribute}",
+ line: @script.line,
+ content: @script.content,
+ )
end
end
end
diff --git a/lib/html_proofer/configuration.rb b/lib/html_proofer/configuration.rb
index 4f789ea3..88147a5c 100644
--- a/lib/html_proofer/configuration.rb
+++ b/lib/html_proofer/configuration.rb
@@ -280,17 +280,25 @@ def parse_json_option(option_name, config, symbolize_names: true)
module ConfigurationHelp
TEXT = {
as_links: ["Assumes that `PATH` is a comma-separated array of links to check."],
- assume_extension: ["Automatically add specified extension to files for internal links, ",
- "to allow extensionless URLs (as supported by most servers) (default: `.html`).",],
+ assume_extension: [
+ "Automatically add specified extension to files for internal links, ",
+ "to allow extensionless URLs (as supported by most servers) (default: `.html`).",
+ ],
directory_index_file: ["Sets the file to look for when a link refers to a directory. (default: `index.html`)."],
- extensions: ["A comma-separated list of Strings indicating the file extensions you",
- "would like to check (default: `.html`)",],
+ extensions: [
+ "A comma-separated list of Strings indicating the file extensions you",
+ "would like to check (default: `.html`)",
+ ],
allow_hash_href: ['"If `true`, assumes `href="#"` anchors are valid (default: `true`)"'],
- allow_missing_href: ["If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically ",
- "allowed, but could also be human error. (default: `false`)",],
- checks: ["A comma-separated list of Strings indicating which checks you",
- "want to run (default: `[\"Links\", \"Images\", \"Scripts\"]",],
+ allow_missing_href: [
+ "If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically ",
+ "allowed, but could also be human error. (default: `false`)",
+ ],
+ checks: [
+ "A comma-separated list of Strings indicating which checks you",
+ "want to run (default: `[\"Links\", \"Images\", \"Scripts\"]",
+ ],
check_external_hash: ["Checks whether external hashes exist (even if the webpage exists) (default: `true`)."],
check_internal_hash: ["Checks whether internal hashes exist (even if the webpage exists) (default: `true`)."],
check_sri: ["Check that `` and `