Skip to content

Commit

Permalink
fix(scanner): allow 401 and 403 responses, set proper accept header o…
Browse files Browse the repository at this point in the history
…n requests (#64)
  • Loading branch information
argl authored Aug 21, 2024
1 parent f917e61 commit b90b1ff
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
15 changes: 12 additions & 3 deletions src/retriever/retriever.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import { AxiosHeaders } from "axios";
import { CONFIG } from "../config.js";
import { HTML_TYPES, Requests } from "../types.js";
import { Session, getPageText } from "./session.js";
import { urls } from "./url.js";
import { parseHttpEquivHeaders } from "./utils.js";

const STANDARD_HEADERS = [
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
];
const ROBOTS_HEADERS = ["Accept: text/plain,*/*;q=0.8"];

/**
*
* @param {*} hostname
Expand All @@ -15,8 +21,8 @@ export async function retrieve(hostname, options = {}) {

const { http, https } = urls(hostname, options);
const [httpSession, httpsSession] = await Promise.all([
Session.fromUrl(http, options),
Session.fromUrl(https, options),
Session.fromUrl(http, { headers: STANDARD_HEADERS, ...options }),
Session.fromUrl(https, { headers: STANDARD_HEADERS, ...options }),
]);

if (!httpSession && !httpsSession) {
Expand All @@ -42,7 +48,10 @@ export async function retrieve(hostname, options = {}) {
retrievals.resources.path = getPageText(retrievals.responses.auto, true);

// Get robots.txt to gather additional cookies, if any.
await retrievals.session?.get({ path: "/robots.txt" });
await retrievals.session?.get({
path: "/robots.txt",
headers: new AxiosHeaders(ROBOTS_HEADERS.join("\n")),
});

// Do a CORS preflight request
const corsUrl = retrievals.session.redirectHistory[
Expand Down
8 changes: 6 additions & 2 deletions src/scanner/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ export async function scan(hostname, options) {
throw new Error("The site seems to be down.");
}

if (r.responses.auto.status < 200 || r.responses.auto.status >= 300) {
throw new Error("Site did not respond with a 2xx HTTP status code.");
// We allow 2xx, 3xx, 401 and 403 status codes
const { status } = r.responses.auto;
if (status < 200 || (status >= 400 && ![401, 403].includes(status))) {
throw new Error(
`Site did respond with an unexpected HTTP status code ${status}.`
);
}

// Run all the tests on the result
Expand Down

0 comments on commit b90b1ff

Please sign in to comment.