From 8e9812fe890c52466147aeab5333b67ebe1153e0 Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sat, 11 Jan 2025 00:03:59 -0500 Subject: [PATCH] perf(cli): add page links direct return --- Cargo.lock | 12 ++++++------ spider/Cargo.toml | 2 +- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_cli/src/main.rs | 17 ++++++----------- spider_transformations/Cargo.toml | 2 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 8 files changed, 18 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf409091c..c5fd9ef89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5392,7 +5392,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.25.4" +version = "2.26.0" dependencies = [ "ahash", "aho-corasick", @@ -5456,7 +5456,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.25.4" +version = "2.26.0" dependencies = [ "adblock", "aho-corasick", @@ -5546,7 +5546,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.25.4" +version = "2.26.0" dependencies = [ "clap", "env_logger", @@ -5589,7 +5589,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.25.4" +version = "2.26.0" dependencies = [ "aho-corasick", "fast_html2md", @@ -5612,7 +5612,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.25.4" +version = "2.26.0" dependencies = [ "indexmap 1.9.3", "serde", @@ -5625,7 +5625,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.25.4" +version = "2.26.0" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 960007535..d3bcded3b 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.25.4" +version = "2.26.0" authors = [ "j-mendez " ] diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index f75f02f15..1c78d1a55 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.25.4" +version = "2.26.0" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index 60734839d..e97310b89 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.25.4" +version = "2.26.0" authors = [ "j-mendez " ] diff --git a/spider_cli/src/main.rs b/spider_cli/src/main.rs index 5ea3cc30c..af70312ff 100644 --- a/spider_cli/src/main.rs +++ b/spider_cli/src/main.rs @@ -10,7 +10,6 @@ use options::{Cli, Commands}; use serde_json::json; use spider::features::chrome_common::RequestInterceptConfiguration; use spider::hashbrown::HashMap; -use spider::page::get_page_selectors; use spider::string_concat::string_concat; use spider::string_concat::string_concat_impl; use spider::tokio; @@ -169,19 +168,15 @@ async fn main() { }) => { let mut stdout = tokio::io::stdout(); - let selectors: Option = if output_links { - get_page_selectors(&url, cli.subdomains, cli.tld) - } else { - None - }; - - let base = website.get_url_parsed().clone(); + if output_links { + website.configuration.return_page_links = true; + } tokio::spawn(async move { website.crawl().await; }); - while let Ok(mut res) = rx2.recv().await { + while let Ok(res) = rx2.recv().await { let page_json = json!({ "url": res.get_url(), "html": if output_html { @@ -189,8 +184,8 @@ async fn main() { } else { Default::default() }, - "links": match selectors { - Some(ref s) => res.links(s, &base).await.iter().map(|i| i.inner().to_string()).collect::(), + "links": match res.page_links { + Some(ref s) => s.iter().map(|i| i.inner().to_string()).collect::(), _ => Default::default() } }); diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index aee18b3a8..9119fc9dc 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.25.4" +version = "2.26.0" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index 17b43d67f..67f480718 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.25.4" +version = "2.26.0" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 6c3d9c170..473f0efad 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.25.4" +version = "2.26.0" authors = [ "j-mendez " ]