diff --git a/src/extensions/ua-parser-extensions.js b/src/extensions/ua-parser-extensions.js index 5e836bbf..bd9d517d 100644 --- a/src/extensions/ua-parser-extensions.js +++ b/src/extensions/ua-parser-extensions.js @@ -53,7 +53,8 @@ const Crawlers = Object.freeze({ // OpenAI's SearchGPT - https://platform.openai.com/docs/bots // PerplexityBot - https://perplexity.ai/perplexitybot // SemrushBot - http://www.semrush.com/bot.html - /((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush)bot)\/([\w\.]+)/i, + // SeznamBot - http://napoveda.seznam.cz/seznambot-intro + /((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i, // Applebot - http://apple.com/go/applebot /(applebot(?:-extended)?)\/([\w\.]+)/i, @@ -62,7 +63,7 @@ const Crawlers = Object.freeze({ /(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i, // ClaudeBot (Anthropic) - /(claude(?:bot|-web))\/([\w\.]+)/i, + /(claude(?:bot|-web)|anthropic-ai)\/?([\w\.]*)/i, // Coc Coc Bot - https://help.coccoc.com/en/search-engine /(coccocbot-(?:image|web))\/([\w\.]+)/i, @@ -89,8 +90,8 @@ const Crawlers = Object.freeze({ // Yeti (Naver) /(yeti)\/([\w\.]+)/i, - // YisouSpider - /(yisouspider)\/?([\w\.]*)/i + // aiHitBot / Cohere-AI / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot + /((?:aihit|diff|timpi|you)bot|cohere-ai|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i ], [NAME, VERSION, [TYPE, CRAWLER]], @@ -99,13 +100,15 @@ const Crawlers = Object.freeze({ // Google Bots /((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i, + // AI2Bot - https://allenai.org/crawler // Bytespider // DataForSeoBot - https://dataforseo.com/dataforseo-bot // Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot + // ImagesiftBot - https://imagesift.com/about // Qihoo 360Spider // TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html // Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp - /(360spider-?(?:image|video)?|bytespider|(?:aspiegel|dataforseo|petal|turnitin)bot|(?=yahoo! )slurp)/i + /\b(360spider-?(?:image|video)?|bytespider|(?:ai2|aspiegel|dataforseo|imagesift|petal|turnitin)bot|teoma|(?=yahoo! )slurp)/i ], [NAME, [TYPE, CRAWLER]] ] @@ -238,8 +241,8 @@ const Fetchers = Object.freeze({ ], [NAME, VERSION, [TYPE, FETCHER]], - // Google Bots / Snapchat - [/(feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i], + // Google Bots / Snapchat / Vercelbot + [/(vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i], [NAME, [TYPE, FETCHER]], ] }); diff --git a/test/specs/browser-crawlers.json b/test/specs/browser-crawlers.json index e527740e..04a1a352 100644 --- a/test/specs/browser-crawlers.json +++ b/test/specs/browser-crawlers.json @@ -49,6 +49,26 @@ "type" : "crawler" } }, + { + "desc" : "AI2Bot", + "ua" : "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)", + "expect" : + { + "name" : "AI2Bot", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "aiHitBot", + "ua" : "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)", + "expect" : + { + "name" : "aiHitBot", + "version" : "2.9", + "type" : "crawler" + } + }, { "desc" : "Applebot", "ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)", @@ -131,7 +151,7 @@ }, { "desc" : "DataForSEO", - "ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot) ", + "ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)", "expect" : { "name" : "DataForSeoBot", @@ -139,6 +159,16 @@ "type" : "crawler" } }, + { + "desc" : "Diffbot", + "ua" : "Diffbot/0.1", + "expect" : + { + "name" : "Diffbot", + "version" : "0.1", + "type" : "crawler" + } + }, { "desc" : "Dotbot", "ua" : "Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)", @@ -329,6 +359,26 @@ "type" : "crawler" } }, + { + "desc" : "ImagesiftBot", + "ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)", + "expect" : + { + "name" : "ImagesiftBot", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "magpie-crawler", + "ua" : "magpie-crawler/1.1 (robots-txt-checker; +http://www.brandwatch.net)", + "expect" : + { + "name" : "magpie-crawler", + "version" : "1.1", + "type" : "crawler" + } + }, { "desc" : "Meta-ExternalAgent", "ua" : "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)", @@ -360,6 +410,26 @@ "type" : "crawler" } }, + { + "desc" : "Omgili", + "ua" : "omgili/0.5 +https://omgili.com", + "expect" : + { + "name" : "omgili", + "version" : "0.5", + "type" : "crawler" + } + }, + { + "desc" : "Omgilibot", + "ua" : "omgilibot/0.3 +http://www.omgili.com/Crawler.html", + "expect" : + { + "name" : "omgilibot", + "version" : "0.3", + "type" : "crawler" + } + }, { "desc" : "OpenAI Search", "ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot", @@ -410,6 +480,36 @@ "type" : "crawler" } }, + { + "desc" : "SeznamBot", + "ua" : "Mozilla/5.0 (compatible; SeznamBot/4.0-RC1; +http://napoveda.seznam.cz/seznambot-intro/)", + "expect" : + { + "name" : "SeznamBot", + "version" : "4.0-RC1", + "type" : "crawler" + } + }, + { + "desc" : "Teoma", + "ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)", + "expect" : + { + "name" : "Teoma", + "version" : "undefined", + "type" : "crawler" + } + }, + { + "desc" : "Timpibot", + "ua" : "Timpibot/0.8 (+http://www.timpi.io)", + "expect" : + { + "name" : "Timpibot", + "version" : "0.8", + "type" : "crawler" + } + }, { "desc" : "TurnitinBot", "ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)", @@ -420,6 +520,16 @@ "type" : "crawler" } }, + { + "desc" : "VelenPublicWebCrawler", + "ua" : "Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)", + "expect" : + { + "name" : "VelenPublicWebCrawler", + "version" : "1.0", + "type" : "crawler" + } + }, { "desc" : "Yahoo! Japan", "ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)", @@ -469,5 +579,15 @@ "version" : "undefined", "type" : "crawler" } + }, + { + "desc" : "YouBot", + "ua" : "YouBot (+http://www.you.com)", + "expect" : + { + "name" : "YouBot", + "version" : "undefined", + "type" : "crawler" + } } ] diff --git a/test/specs/browser-fetchers.json b/test/specs/browser-fetchers.json index 94bada00..dfd76f16 100644 --- a/test/specs/browser-fetchers.json +++ b/test/specs/browser-fetchers.json @@ -118,5 +118,15 @@ "version" : "2.0", "type" : "fetcher" } + }, + { + "desc" : "Vercelbot", + "ua" : "Vercelbot (+https://vercel.com)", + "expect" : + { + "name" : "Vercelbot", + "version" : "undefined", + "type" : "fetcher" + } } ] \ No newline at end of file