Skip to content

Commit

Permalink
Merge pull request #43 from NicolasMassart/29-replace-request
Browse files Browse the repository at this point in the history
Replace Request by Needle
After the tests I made on various cases it seems to be totally compatible.
The issues we currently have are reproduced also exactly the same way on Needle (encoding issues and user-agent issues mainly) so it enforces the idea that it really behaves the same as the current deprecated Requests.
  • Loading branch information
NicolasMassart authored Mar 11, 2021
2 parents 65fdf02 + eadc685 commit aedab9e
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 370 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:

strategy:
matrix:
node-version: [10.x, 12.x, 14.x]
node-version: [10.x, 12.x, 14.x, 15.x]

steps:
- uses: actions/checkout@v2
Expand Down
7 changes: 5 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"use strict";

const url = require('url');
const { URL } = require('url');

const protocols = {
file: require('./lib/proto/file'),
http: require('./lib/proto/http'),
Expand All @@ -16,7 +17,9 @@ module.exports = function linkCheck(link, opts, callback) {
opts = {};
}

const protocol = (url.parse(link, false, true).protocol || url.parse(opts.baseUrl, false, true).protocol || 'unknown:').replace(/:$/, '');
const url = new URL(link, opts.baseUrl);
const protocol = url.protocol.replace(/:$/, '');

if (!protocols.hasOwnProperty(protocol)) {
callback(new Error('Unsupported Protocol'), null);
return;
Expand Down
39 changes: 17 additions & 22 deletions lib/proto/http.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
"use strict";

const BlackHole = require('../BlackHole');
const isRelativeUrl = require('is-relative-url');
const LinkCheckResult = require('../LinkCheckResult');
const ms = require('ms');
const request = require('request');
const needle = require('needle');

module.exports = {

Expand All @@ -26,32 +25,28 @@ module.exports = {
//fallback retry delay will default to 60 seconds not provided in options
let fallbackRetryDelayInMs = ms(opts.fallbackRetryDelay || '60s');

// Decoding and encoding is required to prevent encoding already encoded URLs
// We decode using the decodeURIComponent as it will decode a wider range of
// characters that were not necessary to be encoded at first, then we re-encode
// only the required ones using encodeURI.
// Note that we don't use encodeURIComponents as it adds too much non-necessary encodings
// see "Not Escaped" list in https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent#description
const url = encodeURI(decodeURIComponent(new URL(link, opts.baseUrl).toString()));

const options = {
// Decoding and encoding is required to prevent encoding already encoded URLs
// We decode using the decodeURIComponent as it will decode a wider range of
// characters that were not necessary to be encoded at first, then we re-encode
// only the required ones using encodeURI.
// Note that we don't use encodeURIComponents as it adds too much non-necessary encodings
// see "Not Escaped" list in https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent#description
uri: encodeURI(decodeURIComponent(link)),
headers: {
// override 'User-Agent' (some sites return `401` when the user-agent isn't a web browser)
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
},
maxRedirects: 8,
strictSSL: false,
timeout: ms(timeout),
user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36',
follow_max: 8,
response_timeout: ms(timeout),
rejectUnauthorized: false,
auth: 'auto',
headers: {}
};

if (opts.baseUrl && isRelativeUrl(link)) {
options.baseUrl = opts.baseUrl;
}

if (opts.headers) {
Object.assign(options.headers, opts.headers);
}

request.head(options, function (err, res, body) {
needle.head(url, options, function (err, res) {
if (!err && res.statusCode === 200) {
if (additionalMessage){
err = (err == null) ? additionalMessage : `${err} ${additionalMessage}`;
Expand All @@ -61,7 +56,7 @@ module.exports = {
}

// if HEAD fails (405 Method Not Allowed, etc), try GET
request.get(options, function (err, res) {
needle.get(url, options, function (err, res) {
// If enabled in opts, the response was a 429 (Too Many Requests) and there is a retry-after provided, wait and then retry
if (retryOn429 && res && res.statusCode === 429 && attempts < retryCount) {
//delay will default to fallbackRetryDelay if no retry-after header is found
Expand Down
Loading

0 comments on commit aedab9e

Please sign in to comment.