diff --git a/src/program.js b/src/program.js index 328cfd2..cd5f9ea 100644 --- a/src/program.js +++ b/src/program.js @@ -92,67 +92,6 @@ function getDefaultLocale() { } -program.option('-u --urls ', 'Comma separated url list for scan', list). - option('-p, --preset ', - 'Table preset (minimal, seo, headers, parse, lighthouse, lighthouse-all)', - getConfigVal('preset', 'seo')). - option('-e, --exclude ', - 'Comma separated fields to exclude from results', list). - option('-d, --max-depth ', 'Max scan depth', - getConfigVal('maxDepth', 10)). - option('-c, --concurrency ', - 'Threads number (default: by cpu cores)'). - option('--lighthouse', 'Appends base Lighthouse fields to preset'). - option('--delay ', 'Delay between requests', parseInt, 0). - option('-f, --fields ', - 'Field in format --field \'title=$("title").text()\'', fieldsCustomCollect, - []). - option('--default-filter ', 'Default filter when JSON viewed, example: depth>1'). - option('--no-skip-static', `Scan static files`). - option('--no-limit-domain', `Scan not only current domain`). - option('--docs-extensions', - `Comma-separated extensions that will be add to table (default: ${defaultDocs.join( - ',')})`, list). - option('--follow-xml-sitemap', `Follow sitemap.xml`, - getConfigVal('followXmlSitemap', false)). - option('--ignore-robots-txt', `Ignore disallowed in robots.txt`, - getConfigVal('ignoreRobotsTxt', false)). - option('--url-list', `assume that --url contains url list, will set -d 1 --no-limit-domain --ignore-robots-txt`, - getConfigVal('ignoreRobotsTxt', false)). - option('-m, --max-requests ', `Limit max pages scan`, - getConfigVal('maxRequests', 0)). - option('--influxdb-max-send ', `Limit send to InfluxDB`, - getConfigVal('influxdb.maxSendCount', 5)). - option('--no-headless', `Show browser GUI while scan`, - !getConfigVal('headless', true)). - option('--remove-csv', `No delete csv after xlsx generate`, - getConfigVal('removeCsv', true)). - option('--remove-json', `No delete json after serve`, - getConfigVal('removeJson', true)). - option('--no-remove-csv', `No delete csv after xlsx generate`). - option('--no-remove-json', `No delete json after serve`). - option('--out-dir ', `Output directory`, - getConfigVal('outDir', '~/site-audit-seo/')). - option('--out-name ', `Output file name, default: domain`). - option('--csv ', `Skip scan, only convert csv to xlsx`). - option('--xlsx', `Save as XLSX`, getConfigVal('xlsx', false)). - option('--gdrive', `Publish sheet to google docs`, - getConfigVal('gdrive', false)). - option('--json', `Save as JSON`, getConfigVal('json', true)). - option('--no-json', `No save as JSON`, !getConfigVal('json', true)). - option('--upload', `Upload JSON to public web`, - getConfigVal('upload', false)). - option('--no-color', `No console colors`). - option('--lang ', `Language (en, ru, default: system language)`, - getConfigVal('lang', undefined)). - option('--open-file', - `Open file after scan (default: yes on Windows and MacOS)`, - getConfigVal('openFile', undefined)). - option('--no-open-file', `Don't open file after scan`). - option('--no-console-validate', `Don't output validate messages in console`). - name('site-audit-seo'). - version(packageJson.version). - usage('-u https://example.com --upload') program.postParse = async () => { if (program.openFile === undefined) { @@ -218,20 +157,85 @@ program.postParse = async () => { createDirIfNotExists(program.outDir); } + + + +program.option('-u --urls ', 'Comma separated url list for scan', list). + option('-p, --preset ', + 'Table preset (minimal, seo, headers, parse, lighthouse, lighthouse-all)', + getConfigVal('preset', 'seo')). + option('-e, --exclude ', + 'Comma separated fields to exclude from results', list). + option('-d, --max-depth ', 'Max scan depth', + getConfigVal('maxDepth', 10)). + option('-c, --concurrency ', + 'Threads number (default: by cpu cores)'). + option('--lighthouse', 'Appends base Lighthouse fields to preset'). + option('--delay ', 'Delay between requests', parseInt, 0). + option('-f, --fields ', + 'Field in format --field \'title=$("title").text()\'', fieldsCustomCollect, + []). + option('--default-filter ', 'Default filter when JSON viewed, example: depth>1'). + option('--no-skip-static', `Scan static files`). + option('--no-limit-domain', `Scan not only current domain`). + option('--docs-extensions', + `Comma-separated extensions that will be add to table (default: ${defaultDocs.join( + ',')})`, list). + option('--follow-xml-sitemap', `Follow sitemap.xml`, + getConfigVal('followXmlSitemap', false)). + option('--ignore-robots-txt', `Ignore disallowed in robots.txt`, + getConfigVal('ignoreRobotsTxt', false)). + option('--url-list', `assume that --url contains url list, will set -d 1 --no-limit-domain --ignore-robots-txt`, + getConfigVal('ignoreRobotsTxt', false)). + option('-m, --max-requests ', `Limit max pages scan`, + parseInt, getConfigVal('maxRequests', 0)). + option('--influxdb-max-send ', `Limit send to InfluxDB`, + getConfigVal('influxdb.maxSendCount', 5)). + option('--no-headless', `Show browser GUI while scan`, + !getConfigVal('headless', true)). + option('--remove-csv', `No delete csv after xlsx generate`, + getConfigVal('removeCsv', true)). + option('--remove-json', `No delete json after serve`, + getConfigVal('removeJson', true)). + option('--no-remove-csv', `No delete csv after xlsx generate`). + option('--no-remove-json', `No delete json after serve`). + option('--out-dir ', `Output directory`, + getConfigVal('outDir', '~/site-audit-seo/')). + option('--out-name ', `Output file name, default: domain`). + option('--csv ', `Skip scan, only convert csv to xlsx`). + option('--xlsx', `Save as XLSX`, getConfigVal('xlsx', false)). + option('--gdrive', `Publish sheet to google docs`, + getConfigVal('gdrive', false)). + option('--json', `Save as JSON`, getConfigVal('json', true)). + option('--no-json', `No save as JSON`, !getConfigVal('json', true)). + option('--upload', `Upload JSON to public web`, + getConfigVal('upload', false)). + option('--no-color', `No console colors`). + option('--lang ', `Language (en, ru, default: system language)`, + getConfigVal('lang', undefined)). + option('--open-file', + `Open file after scan (default: yes on Windows and MacOS)`, + getConfigVal('openFile', undefined)). + option('--no-open-file', `Don't open file after scan`). + option('--no-console-validate', `Don't output validate messages in console`). + name('site-audit-seo'). + version(packageJson.version). + usage('-u https://example.com --upload') + program.getOptions = () => { const opts = { fieldsPreset: program.preset, // варианты: default, seo, headers, minimal fieldsExclude: program.exclude, // исключить поля - maxDepth: program.maxDepth, // глубина сканирования - maxConcurrency: parseInt(program.concurrency), // параллельно открываемые вкладки + maxDepth: program.maxDepth, // chrome-crawler, глубина сканирования + maxConcurrency: parseInt(program.concurrency), // chrome-crawler, параллельно открываемые вкладки lighthouse: program.lighthouse, // сканировать через lighthouse delay: parseInt(program.delay), // задержка между запросами skipStatic: program.skipStatic, // не пропускать подгрузку браузером статики (картинки, css, js) - followSitemapXml: program.followXmlSitemap, // чтобы найти больше страниц + followSitemapXml: program.followXmlSitemap, // chrome-crawler, чтобы найти больше страниц limitDomain: program.limitDomain, // не пропускать подгрузку браузером статики (картинки, css, js) urlList: program.urlList, // метка, что передаётся страница со списком url - maxRequest: program.maxRequests, // для тестов - headless: program.headless, // на десктопе открывает браузер визуально + maxRequest: program.maxRequests, // chrome-crawler, для тестов + headless: program.headless, // chrome-crawler, на десктопе открывает браузер визуально docsExtensions: program.docsExtensions, // расширения, которые будут добавлены в таблицу outDir: program.outDir, // папка, куда сохраняются csv outName: program.outName, // имя файла @@ -247,7 +251,7 @@ program.getOptions = () => { json: program.json, // сохранять json файл upload: program.upload, // выгружать json на сервер consoleValidate: program.consoleValidate, // выводить данные валидации в консоль - obeyRobotsTxt: !program.ignoreRobotsTxt, // не учитывать блокировки в robots.txt + obeyRobotsTxt: !program.ignoreRobotsTxt, // chrome-crawler, не учитывать блокировки в robots.txt influxdb: program.influxdb, // конфиг influxdb urls: program.urls // адреса для одиночного сканирования }; @@ -269,6 +273,11 @@ program.outBrief = (options) => { `, ${color.yellow}recommended to set -c 1 when using lighthouse${color.reset}` : ''), }, + { + name: 'Lighthouse', + value: (program.lighthouse ? 'yes' : 'no'), + comment: (program.lighthouse ? '' : '--lighthouse') + }, { name: 'Delay', value: program.delay, diff --git a/src/scrap-site.js b/src/scrap-site.js index 98de429..b5bbd7b 100644 --- a/src/scrap-site.js +++ b/src/scrap-site.js @@ -527,7 +527,7 @@ module.exports = async (baseUrl, options = {}) => { if (options.upload) webPath = await uploadJson(jsonPath, options); // if (options.gdrive) webPath = await publishGoogleDrive(jsonPath); - if (options.influxdb) { + if (options.influxdb && options.fieldsPreset == 'seo') { log('send to InfluxDB...'); const points = await sendToInfluxDB(jsonPath, options); log(`sent ${points.length} points`); @@ -571,7 +571,7 @@ module.exports = async (baseUrl, options = {}) => { await saveAsJson(csvPath, jsonPath, options.lang, options.preset, options.defaultFilter); // upload to influxdb - if (options.influxdb) { + if (options.influxdb && options.fieldsPreset == 'seo') { log('send to InfluxDB...'); const points = await sendToInfluxDB(jsonPath, options); log(`sent ${points.length} points`); diff --git a/src/server.js b/src/server.js index 04c103d..3f0d7ef 100644 --- a/src/server.js +++ b/src/server.js @@ -1,7 +1,6 @@ const lowdb = require('lowdb'); const FileSync = require('lowdb/adapters/FileSync'); -const program = require("./program"); const scrapSite = require("./scrap-site"); const queue = require("queue"); @@ -52,6 +51,20 @@ async function onScan(url, args, socket) { return; } + const program = require("./program"); + + // repeat default, cross-scans global! + delete(program.preset); + delete(program.concurrency); + delete(program.lighthouse); + program.delay = 0; + delete(program.ignoreRobotsTxt); + delete(program.followXmlSitemap); + delete(program.maxDepth); + delete(program.maxRequests); + delete(program.language); + delete(program.influxDbMaxSend); + program.exitOverride(); try { program.parse([...["", ""], ...args]);