diff --git a/Asssets/screenshot1.jpg b/Asssets/screenshot1.jpg index 254dc50..50f91b6 100644 Binary files a/Asssets/screenshot1.jpg and b/Asssets/screenshot1.jpg differ diff --git a/README.md b/README.md index f40a9d2..f029230 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,22 @@ March 2022 ![HighlightedTextOutput](Asssets/screenshot2.jpg) ## Description: -This is a PoC I wrote (poorly) in one day that spiders websites while listing their response headers and describes the web servers security ONLY based on those headers. +This is a PoC that spiders websites and lists security related information based on their response headers & meta tags and describes the site's security only based on that. -This ignores all privacy related headers such as referrer-policy. This is just something I made because I like spiders and it helps me to learn and rememeber if I codify my knowledge. Note: I build this on what I found on the internet, I didn't look at all of the standards/spec's/RFC's so there's always going to be new/missing/funky stuff (I prefer to see what's the the real world). -ToDo: -- Print description about the Cookie and it's attributes +This ignores all privacy related headers such as referrer-policy. This is just something I made because I like spiders and it helps me to learn and rememeber details about CSP, CORS, XSS, CSRF, and Cookie Security. Note: Since I prefer to see what's in the real world I didn't look at all of the standards/spec's/RFC's so there's always going to be new/missing/funky stuff. + +## Arguements +``` +SecuritySiteSpider.exe [-h|--help] [-shuffle|-randomize] [url url url...] +``` + +## ToDo: +- Highlight non-standard HTTP Headers allowed in 'Access-Control-Allow-Headers'. A webserver is telling you they allow it... and it's custom so it's probably ripe for abuse +- The current code just prints information about the CSP. I should analyze it a bit deeper: "Policies are combined by taking the intersection of the policies; that is to say, each policy after the first can only further restrict the allowed content, not broaden it." https://web.dev/fixing-mixed-content/ +- Take into account crossorigin attribute can be in the script tag - "anonymous" and "use-credentials" (aka cookie) +- Explain that the lack of a sandbox CSP will mean that a loaded iframe can prompt a download (research "csp and socgolish") +- detection for CSRF tokens - Highlight non-standard HTTP Headers allowed in 'Access-Control-Allow-Headers'. A webserver is telling you they allow it... and it's custom so it's probably ripe for abuse - Scrape the page because the meta HTML tag can contain the Content Security Policy (and probably other things). (though not Content-Security-Policy-Report-Only). @@ -30,12 +40,27 @@ ToDo: - Store everything in a DataBase - Somehow Trigger the accept cookie - print non-standard headers: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields +- Codify everything on this site: https://www.geeksforgeeks.org/http-headers/ +- Create better method to find links to FQDN's +- Load url list from file +- Look at https://developer.mozilla.org/en-US/docs/Glossary/CORS-safelisted_response_header +- Store everything in a 'mark as interesting' feature +- Let me google that for you +- Store everything in a DataBase +- Somehow Trigger a generic 'accept cookie' +- print non-standard headers: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields -Ideas: +## Ideas: - Make a list of setFrameOptions not setting their value, and look up on hackerone -- We could build a web of trust +- Build a visual web of trust based on all the CSP's - Submit every URL to an analzer to see if the domain host content for us (like pastebin, or CDN), and see if VT has anything on it -- wget mirror -> Create a Content-Security-Policy that won't break anything +- Gernated a (basic) CSP that wouldn't break anything. wget mirror -> Create a Content-Security-Policy that won't break anything - Is there an easy way to drive the browser, and get the Console errors? - Note: SVG images seem to require the 'data:' - - Basics: Xss_NoMimeSniffing, reffer policy set to send no data, \ No newline at end of file + - Basics: Xss_NoMimeSniffing, reffer policy set to send no data, + - How would I account for dynamic javascript loading resources? + +## Others who have done something similar: +* https://github.com/researchapps/url-headers +* https://httpschecker.net/how-it-works#httpsChecker +* https://github.com/bramus/mixed-content-scan \ No newline at end of file diff --git a/SecuritySiteSpider/Program.cs b/SecuritySiteSpider/Program.cs index 0a7a6b6..33fe8a3 100644 --- a/SecuritySiteSpider/Program.cs +++ b/SecuritySiteSpider/Program.cs @@ -1,81 +1,23 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Net; -using System.Text; using System.Text.RegularExpressions; -using System.Threading.Tasks; -using System.Collections.Generic; -using System.Text.RegularExpressions; - namespace SecuritySiteSpider { public static class Program { - /* - * Description: - * This is a PoC that spiders websites while analyzing their response headers and describes the webservers security ONLY based on those headers. - * This ignores all privacy related issues such as referrer-policy - * This is just something I made because I like spiders and it helps me to learn and rememeber if I codify my knowledge - * Note: I build this on what I found on the internet, I didn't look at all of the standards/spec's/RFC's so there's always going to be - * new/missing/funky stuff (I prefer to see what's the the real world). - * - * ToDo: - * - Explain that the lack of a sandbox CSP will mean that a loaded iframe can prompt a download - * - Print description about the Cookie and it's attributes - * - Highlight non-standard HTTP Headers allowed in 'Access-Control-Allow-Headers'. A webserver is telling you they allow it... and it's custom so it's probably ripe for abuse - * - Scrape the page because - * the meta HTML tag can contain the Content Security Policy (and probably other things). (though not Content-Security-Policy-Report-Only). - * Ex: - * https://web.dev/fixing-mixed-content/ says "Policies are combined by taking the intersection of the policies; that is to say, each policy after the first can only further restrict the allowed content, not broaden it." - * crossorigin attribute can be in the script tag - "anonymous" and "use-credentials" (aka cookie) - * - Store everything in a Log File - * - Create better method to find links - * - If given just a path, just discard it - * - Load url list from file - * - Look at https://developer.mozilla.org/en-US/docs/Glossary/CORS-safelisted_response_header - * - Store everything in a 'mark as interesting' feature - * - Let me google that for you - * - Store everything in a DataBase - * - Somehow Trigger the accept cookie - * - print non-standard headers: https://en.wikipedia.org/wiki/List_of_HTTP_header_fields - * - * Ideas: - * - Make a list of setFrameOptions not setting their value, and look up on hackerone - * - We could build a web of trust - * - Submit every URL to an analzer to see if the domain host content for us (like pastebin, or CDN), and see if VT has anything on it - * - wget mirror -> Create a Content-Security-Policy that won't break anything - * Is there an easy way to drive the browser, and get the Console errors? - * Note: SVG images seem to require the 'data:' - * Basics: Xss_NoMimeSniffing, reffer policy set to send no data, - * - * Looks like someone has already done this: - * https://github.com/researchapps/url-headers - * https://httpschecker.net/how-it-works#httpsChecker - * https://github.com/bramus/mixed-content-scan - * - * Interesting security features: - * https://www.openstreetmap.org/?mlat=22.4449&mlon=114.0263#map=15/22.4449/114.0263 - * https://www.mapquest.com/search/results?query=Gas - * https://www.washingtonpost.com/ - * https://medlineplus.gov/ - * https://fun.chicagotribune.com/game/tca-jumble-daily (XSS-protection set to 0) - * https://www.startribune.com/local/ allow-headers & methods & creds - * http://www.mozilla.com/ many content-security-policies set - * https://www.pinterest.com many content-security-policies set - * https://duckduckgo.com/ - * http://www.usatoday.com/sports/fantasy/football/ - * https://www.nytimes.com/column/thomas-l-friedman - * */ public static bool printDefaultFailures = true; + private static bool shuffleFlag = false; // This is just a simple data class to contain the default security state // as dictated as the headers state. I'm basiclly doing this to codify my // own knowledge. This is not meant for other people class SiteSecurityState - { // ToDo: Change class name to WebSiteSecurityState or something more appropreiate + { // HSTS was created to combat SSL Strip: https://www.secplicity.org/2019/11/05/hsts-a-trivial-response-to-sslstrip/#:~:text=HSTS%20tries%20to%20fix%20the,to%20a%20genuine%20HTTP%20website. private bool hsts = false; @@ -96,8 +38,8 @@ class SiteSecurityState // CSP - private bool csp = false; - private bool csp_upgrade_insecure_requests = false; + private bool csp = false; + private bool csp_upgrade_insecure_requests = false; private bool csp_img_src = false; private List csp_img_src_List = new List(); private bool csp_script_src = false; @@ -124,7 +66,7 @@ class SiteSecurityState override public string ToString() { - string sumString = ""; // This is dumb. I have no idea why I did it like this. Note: don't code while tired + string sumString = ""; // This is dumb. I have no idea why I did it like this. Note: don't code while sick and tired // Cookie Security if (cookie) @@ -149,12 +91,12 @@ public string ToString() { sumString += " and the browser WILL leak the cookie over an http connection"; } - + } if (cookie_domain) { - + sumString += " and the cookie will always be sent back to the original site and subdomains"; } sumString += ". "; @@ -169,11 +111,11 @@ public string ToString() { cookie_lax = true; } - else - { - // "The browser will not leak the cookie over an http connection" AND... coming from another site will also work - sumString += "The cookie will only be sent back to the original site but NOT when a user clicks a link from another site to here (aka no Cross Site Requests, aka no CSRF)"; - } + //else... idk I'm confused about this + //{ + // // "The browser will not leak the cookie over an http connection" AND... coming from another site will also work + // sumString += "The cookie will only be sent back to the original site but NOT when a user clicks a link from another site to here (aka no Cross Site Requests, aka no CSRF)"; + //} } if (cookie_strict) @@ -201,10 +143,11 @@ public string ToString() sumString += "\n\tThe browser will only render JavaScript from: "; foreach (string uri in csp_script_src_List) { - if(uri.ToLower().Trim() == "" || uri.ToLower().Trim() == "'unsafe-inline'" || uri.ToLower().Trim() == "'unsafe-eval'") + if (uri.ToLower().Trim() == "" || uri.ToLower().Trim() == "'unsafe-inline'" || uri.ToLower().Trim() == "'unsafe-eval'") { // Don't print these - } else + } + else { sumString += "\n\t\t"; if (uri.ToLower().Trim() == "https:" || uri.ToLower().Trim() == "*") @@ -212,13 +155,14 @@ public string ToString() else sumString += uri; } - + } if (!Xss_NoMimeSniffing) { sumString += "\n\t\t" + "The browser also might confuse some html or text for javascript"; } - } else + } + else { sumString += "\n\tThe browser will render JavaScript from ANYWHERE on this site"; } @@ -264,8 +208,8 @@ public string ToString() { sumString += "\n\tThe server will process form POSTing to any URL"; } - - + + if (csp_style_src) { sumString += "\n\tThe browser will only allow styles loaded from: "; @@ -293,7 +237,8 @@ public string ToString() { sumString += "\n\tThe server will load stylesheets from any URL on this site"; } - } else + } + else { // if there is no Content Security Policy found sumString += "\n- There site has NO Content Security Policy SO..."; sumString += "\n\tThe browser will render JavaScript from ANYWHERE on this site"; @@ -312,8 +257,9 @@ public string ToString() sumString += " including subdomains"; if (hsts_preload) sumString += " and this will eventually happen without that first connection to the server because it's going on the preload list"; - - } else + + } + else { sumString += "This site can be loaded over HTTP (SslStripping is possible if HTTPS exists)"; } @@ -321,10 +267,10 @@ public string ToString() // x-frame-options - + if (canBeEmbedded) { - + if (embeddedList.Count == 1) { if (embeddedList[0].ToLower().Contains("sameorigin")) @@ -336,17 +282,18 @@ public string ToString() { sumString += "- "; sumString += "This site can be embedded in an iframe, so phishing/Clickjacking might be possible"; - if(embeddedList.Count == 0) + if (embeddedList.Count == 0) { sumString += " from ANYWHERE"; - } else + } + else { sumString += " from these sites: "; foreach (string site in embeddedList) { sumString += " " + site + " "; } - + } sumString += ".\n"; } @@ -413,7 +360,7 @@ internal void setCookieParam(string value) if (param.Contains("strict")) cookie_strict = true; break; - case string s when s.StartsWith("max-age"): + case string s when s.StartsWith("max-age"): // TODO: Look for REALLY long lasting cookie's break; case string s when s.StartsWith("domain"): @@ -429,10 +376,14 @@ internal void setCookieParam(string value) break; default: if (printDefaultFailures) - Console.WriteLine("Non-standard Cookie component: " + value); + { + Console.ForegroundColor = ConsoleColor.Gray; + Console.WriteLine("\t(Non-standard Cookie component)"); + Console.ForegroundColor = ConsoleColor.Green; + } break; } - + } internal void setCSPParam(string key, string value) @@ -454,7 +405,6 @@ internal void setCSPParam(string key, string value) // Deprecated. in favor of report-to break; case string s when s.StartsWith("report-to"): - // Deprecated. in favor of report-to break; case string s when s.StartsWith("frame-ancestors"): break; @@ -520,7 +470,7 @@ internal void processGenericSecurityHeader(string key, string value) // Can it be embedded in an iframe setFrameOptions(value); break; - case "x-xss-protection": + case "x-xss-protection": // This is in the ignore list for now because it's been retired by modern browsers processXssProtection(value); break; @@ -573,8 +523,13 @@ internal void processGenericSecurityHeader(string key, string value) default: - if(printDefaultFailures) + if (printDefaultFailures) + { + Console.ForegroundColor = ConsoleColor.Gray; Console.WriteLine("What is the default value of " + key.ToLower()); + Console.ForegroundColor = ConsoleColor.DarkYellow; + } + break; } } @@ -588,12 +543,13 @@ private void processReferrerPolicy(string value) private void processMineTypeXssProtection(string value) { - if (value.ToLower().Contains("nosniff")){ + if (value.ToLower().Contains("nosniff")) + { Xss_NoMimeSniffing = true; } else { - Console.WriteLine("Error: x-content-type-options should only be able to have 'nosniff' and it has: " + value); + Console.WriteLine("\t\tError: x-content-type-options should only be able to have 'nosniff' and it has: " + value); } } private void processXssProtection(string values) @@ -628,8 +584,8 @@ private void processXssProtection(string values) } } - - + + } private void processHSTS(string value) @@ -650,7 +606,7 @@ private void processHSTS(string value) hsts_preload = true; } } - + //if (hsts) @@ -675,12 +631,12 @@ internal void setFrameOptions(string value) case string s when s.StartsWith("allow-from"): embeddedList.Add(value); break; - case string s when s.Trim() == "": + case string s when s.Trim() == "": // I don't know how, but some sites just don't supply anything so (Chrome at least) ignores it for now canBeEmbedded = true; break; default: - if(printDefaultFailures) + if (printDefaultFailures) Console.WriteLine("Need to process x-frame-options value: " + value); break; } @@ -743,41 +699,197 @@ public static List Find(string file) static int Main(string[] args) { + string metaTagFailLog = "MetaTagFail.log"; + string htmlLogPath = "LastPage.html"; List visited = new List(); Queue links = new Queue(); - //links.Enqueue("https://www.nytimes.com/subscription/dg-cookie-policy/cookie-policy.html"); - //links.Enqueue("https://www.nytimes.com/column/thomas-l-friedman"); - links.Enqueue("https://refdesk.com/"); - //links.Enqueue("https://sdb.tools/"); - //links.Enqueue("https://www.openstreetmap.org/?mlat=22.4449&mlon=114.0263#map=15/22.4449/114.0263"); - //links.Enqueue("https://www.mapquest.com/search/results?query=Gas"); - //links.Enqueue("https://www.washingtonpost.com/"); - //links.Enqueue("https://medlineplus.gov/"); - //links.Enqueue("https://fun.chicagotribune.com/game/tca-jumble-daily"); - //links.Enqueue("https://www.startribune.com/local/"); - //links.Enqueue("http://www.mozilla.com/"); - //links.Enqueue("https://www.pinterest.com"); - //links.Enqueue("https://duckduckgo.com/"); - //links.Enqueue("http://www.usatoday.com/sports/fantasy/football/"); + foreach(string arg in args) + { + if (arg.ToLower() == "-h" || arg.ToLower() == "--help") + { + Console.WriteLine("Only supported arguments are URL's to start the spider and 'shuffle'"); + return 0; + } + else if (arg.ToLower() == "-shuffle" || arg.ToLower() == "-randomize") + { + shuffleFlag = true; + } + else + { + if (arg.StartsWith("http://") || arg.StartsWith("https://")) + links.Enqueue(arg); + else + { + links.Enqueue("http://" + arg); + links.Enqueue("https://" + arg); + } + } + } + links.Enqueue("https://www.nytimes.com/subscription/dg-cookie-policy/cookie-policy.html"); + links.Enqueue("https://www.nytimes.com/column/thomas-l-friedman"); + links.Enqueue("https://refdesk.com/"); + links.Enqueue("https://sdb.tools/"); + links.Enqueue("https://www.openstreetmap.org/?mlat=22.4449&mlon=114.0263#map=15/22.4449/114.0263"); + links.Enqueue("https://www.mapquest.com/search/results?query=Gas"); + links.Enqueue("https://www.washingtonpost.com/"); + links.Enqueue("https://medlineplus.gov/"); + links.Enqueue("https://fun.chicagotribune.com/game/tca-jumble-daily"); + links.Enqueue("https://www.startribune.com/local/"); + links.Enqueue("http://www.mozilla.com/"); + links.Enqueue("https://www.pinterest.com"); + links.Enqueue("https://duckduckgo.com/"); + links.Enqueue("http://www.usatoday.com/sports/fantasy/football/"); + + + var securityList = new List +{ + "x-frame-options", + "strict-transport-security", + "permissions-policy", + "cross-origin-opener-policy", + "cross-origin-resource-policy", + "timing-allow-origin", + "x-origin-time", + "report-to", // Whenever a user visits a page on your site, their browser sends JSON-formatted reports regarding anything that violates the content security policy to this URL + "x-redis", + "x-content-type-options", + + // Access Control around the idea of Same Origin Policy (SOP). + // Headers that start with, "access-control" are sent back to the browser to tell it what it should* be doing and what it should* have access to across origins (aka sites) + // So these headers are designed to tell the browser how to access Cross Origin Resources (for Sharing) aka CORS so these are 'CORS' headers + "access-control-allow-credentials", + "access-control-allow-methods", + "access-control-allow-headers", + "access-control-allow-origin", // Access-Control-Allow-Origin response header to tell the browser that the content of this page is accessible to certain origins. https://stackoverflow.com/questions/10636611/how-does-access-control-allow-origin-header-work + "content-security-policy", + "content-security-policy-report-only", + "origin" + +}; + + var doNotPrintList = new List +{ + "server", // This might be fun + "x-client-ip", // This might be fun + "x-powered-by", // this might be fun + "x-content-powered-by", // this might be fun + "x-served-by", // this might be fun + "served-by", // this might be fun + "x-servedbyhost", // this might be fun - ::ffff:127.0.0.1 + "x-hosted-by", // this might be fun to map + "x-bbackend", // this might be fun + "x-backend", // this might be fun + "x-backend-server", // this might be fun + "x-datacenter", // this might be fun + "x-url", + "x-host", + "x-pbs-appsvrip", // Bug: leaking internal IP info found on https://www.pbs.org/newshour/ + "x-pbs-", + + // Info about me (aka creepy) + "x-dbg-gt", + "x-true-client-ip", + + // Proxy Related + "x-forwarded-for", // this might be fun + "via", // this might be fun + + // This might change the sites response format + "vary", + "x-ua-compatible", + + // AWS + "x-amz-cf-pop", + "x-amz-cf-id", + "x-amz-version-id", + "x-amz-id-2", + "x-amz-request-id", + "x-amz-meta-uncompressed-size", + + // Fastly + "fastly-original-body-size", + + // Security related but don't really matter + "expect-ct", // Cert transparency + + // CMS's + "x-drupal-cache", // Cert transparency + "wpx", // I think it's a wordpress site + + // Interesting + "nel", // Network Error Logging (404's and such) + "accept-ranges", // resume downloads from a certain byte offset + + "x-origin-time", + "origin-trial", // washingtonpost.com + "x-xss-protection", // this is on the ignore list because it's been retired by modern browsers + "x-permitted-cross-domain-policies", // used to permit cross-domain requests from Flash and PDF documents + "x-download-options", // just an IE-8 thing + "referrer-policy", // just a privacy thing https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy + "content-type", + "akamai-grn", + "x-cdn", + "x-route", + "x-origin-cache", + "x-varnish", + "pragma", + "x-gt-setter", + "x-route-akamai", + "x-edge-cache-expiry", + "x-edge-cache-duration", + "link", // link to metadata about the site + "x-httpd", // response header information and usage statistics. + "transfer-encoding", + "x-timer", + "x-vcache", + "cache-control", + "connection", + "x-proxy-cache", + "x-response-time", + "x-cdn-rule", + "date", + "etag", + "age", + "x-cache", + "fastly-restarts", + "upgrade", + "x-cache", + "status", + "keep-alive", + "cf-cache-status", + "content-length", + "content-language", + "expires", + "x-runtime", + "x-rateLimit-reset", + "x-ratelimit-limit", + "x-rateLimit-remaining", + "last-modified", + + + // Meta tags that I don't care much about +}; while (links.Count > 0) { - ShuffleQueue(links); // TODO: disable suffling if reading from a file + if(shuffleFlag) + ShuffleQueue(links); string site = links.Dequeue(); if (visited.Contains(site)) { continue; } - System.Console.WriteLine("Scrapping server headers from " + site); + System.Console.WriteLine("Scrapping server headers and meta tags from " + site); WebClient web = new WebClient(); string html = ""; try { html = web.DownloadString(site); - } catch(Exception ex) + } + catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; System.Console.WriteLine("Error: " + ex.Message); @@ -785,7 +897,7 @@ static int Main(string[] args) continue; } visited.Add(site); - + writeToFile(html, htmlLogPath); // Extract Headers WebHeaderCollection myWebHeaderCollection = web.ResponseHeaders; @@ -802,279 +914,185 @@ static int Main(string[] args) // Extract Meta tags - // - Regex rx = new Regex("", - RegexOptions.Compiled | RegexOptions.IgnoreCase); - MatchCollection matches = rx.Matches(html); - foreach (Match match in matches) + // I might need to account for single quotes. See MetaTagFail.log. https://regex101.com/ is REALLY useful + Regex allMetatags = new Regex("", RegexOptions.Compiled | RegexOptions.IgnoreCase); + // Typical tags: + Regex securityMetaTags = new Regex("meta.*http-equiv.*=\"(.*)\".*content.*=\"(.*)\".*>", RegexOptions.Compiled | RegexOptions.IgnoreCase); + // Tags without content: + Regex securityNoContentMetaTags = new Regex("meta.*http-equiv.*=\"(.*)\".*>", RegexOptions.Compiled | RegexOptions.IgnoreCase); + foreach (string line in html.Split('\n')) { - GroupCollection groups = match.Groups; - string key = groups[1].ToString(); - string value = groups[2].ToString(); - pageAttributesOrignial.Add(Tuple.Create(key, value)); - pageAttributes.Add(Tuple.Create(key.ToLower(), value.ToLower())); + foreach (string tag in line.Split('<')) + { + bool metaTagFlag = false; + if (tag.Contains("http-equiv")) + { + metaTagFlag = true; + //Console.WriteLine("Should Catch: {0}", line.Trim()); + } + + MatchCollection matches = securityMetaTags.Matches(tag); + foreach (Match match in matches) + { + GroupCollection groups = match.Groups; + string key = groups[1].ToString(); + string value = groups[2].ToString(); + pageAttributesOrignial.Add(Tuple.Create(key, value)); + pageAttributes.Add(Tuple.Create(key.ToLower(), value.ToLower())); + metaTagFlag = false; + if (key.ToLower().Contains("content-security")) + { + Console.WriteLine("\tFound CSP in meta tag: '{0}' = '{1}'", key, value); + } + } + if (metaTagFlag && !tag.StartsWith("!--")) + { + // Try again with a more general, no content regex: + matches = securityNoContentMetaTags.Matches(tag); + foreach (Match match in matches) + { + GroupCollection groups = match.Groups; + string key = groups[1].ToString(); + string value = ""; + pageAttributesOrignial.Add(Tuple.Create(key, value)); + pageAttributes.Add(Tuple.Create(key.ToLower(), value.ToLower())); + metaTagFlag = false; + } + + // If I still fail to scrape the meta tag, log it and move on + if (metaTagFlag) + { + string message = String.Format("RegEx did not this possible meta tag from site {0}: {1} ", site, line.Trim()); + writeToFile(message, metaTagFailLog); + } + + + + } + } } + SiteSecurityState page = new SiteSecurityState(); foreach ((string key, string value) in pageAttributes) { - - } - - for (int i = 0; i < myWebHeaderCollection.Count; i++) - { - // My "Don't print" list - // ToDo: Make this into a swtich case, list, or something less obnoxious - if ( - myWebHeaderCollection.GetKey(i).ToLower().Contains("server") || // This might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-client-ip") || // This might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-powered-by") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-content-powered-by") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-served-by") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("served-by") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-servedbyhost") || // this might be fun - ::ffff:127.0.0.1 - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-hosted-by") || // this might be fun to map - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-bbackend") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-backend") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-datacenter") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-url") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-host") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-pbs-appsvrip") || // Bug: leaking internal IP info found on https://www.pbs.org/newshour/ - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-pbs-") || - - // Info about me (aka creepy) - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-dbg-gt") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-true-client-ip") || - - // Proxy Related - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-forwarded-for") || // this might be fun - myWebHeaderCollection.GetKey(i).ToLower().Contains("via") || // this might be fun - - // This might change the sites response format - myWebHeaderCollection.GetKey(i).ToLower().Contains("vary") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-ua-compatible") || - - // CloudFlare - myWebHeaderCollection.GetKey(i).ToLower().StartsWith("cf-") || - myWebHeaderCollection.GetKey(i).ToLower().StartsWith("x-turbo-charged-by") || - - // AWS - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-cf-pop") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-cf-id") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-version-id") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-id-2") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-request-id") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-amz-meta-uncompressed-size") || - - // Fastly - myWebHeaderCollection.GetKey(i).ToLower().Contains("fastly-original-body-size") || - - // Security related but don't really matter - myWebHeaderCollection.GetKey(i).ToLower().Contains("expect-ct") || // Cert transparency - - // CMS's - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-drupal-cache") || // Cert transparency - myWebHeaderCollection.GetKey(i).ToLower().Contains("wpx") || // I think it's a wordpress site - - // Interesting - myWebHeaderCollection.GetKey(i).ToLower().Contains("nel") || // Network Error Logging (404's and such) - myWebHeaderCollection.GetKey(i).ToLower().Contains("accept-ranges") || // resume downloads from a certain byte offset - - // No documentation, don't seem to matter: - // x-gen-mode - // x-hnp-log - // x-ads - - - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-origin-time") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("origin-trial") || // washingtonpost.com - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-xss-protection") || // this is on the ignore list because it's been retired by modern browsers - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-permitted-cross-domain-policies") || // used to permit cross-domain requests from Flash and PDF documents - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-download-options") || // just an IE-8 thing - myWebHeaderCollection.GetKey(i).ToLower().Contains("referrer-policy") || // just a privacy thing https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy - myWebHeaderCollection.GetKey(i).ToLower().Contains("content-type") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("akamai-grn") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-cdn") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-route") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-origin-cache") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-varnish") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("pragma") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-gt-setter") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-route-akamai") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-edge-cache-expiry") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-edge-cache-duration") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("link") || // link to metadata about the site - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-httpd") || // response header information and usage statistics. - myWebHeaderCollection.GetKey(i).ToLower().Contains("transfer-encoding") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-timer") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-vcache") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("cache-control") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("connection") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-proxy-cache") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-response-time") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-cdn-rule") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("date") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("etag") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("age") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-cache") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("fastly-restarts") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("upgrade") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-cache") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("status") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("keep-alive") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("cf-cache-status") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("content-length") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("content-language") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("expires") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-runtime") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-rateLimit-reset") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-ratelimit-limit") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-rateLimit-remaining") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("last-modified") - ) + if (doNotPrintList.Contains(key)) { continue; } - else - { - - if ( // Cookie related: - myWebHeaderCollection.GetKey(i).ToLower().Contains("cookie") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("p3p") || // Certain browsers require a P3P compact policy for cookies to be sent or received in some cases, including the situation involved in the SUL login check - myWebHeaderCollection.GetKey(i).ToLower().Contains("vary") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("alt-svc") || // alternate service - could be a backup server - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-logged-in") - ) - { - - Console.ForegroundColor = ConsoleColor.Green; + else if (key.Contains("cookie")) + { // Cookie Security + Console.ForegroundColor = ConsoleColor.Green; - if (myWebHeaderCollection.GetKey(i).ToLower().Contains("vary") && !myWebHeaderCollection.Get(i).ToLower().Contains("cookie")) - { - Console.ForegroundColor = ConsoleColor.Gray; - } if (myWebHeaderCollection.GetKey(i).ToLower() == "set-cookie") + if (key.Contains("vary") && !key.Contains("cookie")) + { + Console.ForegroundColor = ConsoleColor.Gray; + } + if (key == "set-cookie") + { + Console.WriteLine("\t" + key); + string[] values = value.Split(';'); + for (int j = 0; j < values.Length; j++) { - Console.WriteLine("\t" + myWebHeaderCollection.GetKey(i)); - string[] values = myWebHeaderCollection.Get(i).Split(';'); - for (int j = 0; j < values.Length; j++) + if (j == 0) + { // Don't highlight the Cookie's value or try to process it + Console.ForegroundColor = ConsoleColor.Gray; + Console.WriteLine("\t\t" + values[j].Trim()); + } + else { - if(j == 0) - { // Don't highlight the Cookie's value or try to process it - Console.ForegroundColor = ConsoleColor.Gray; - Console.WriteLine("\t\t" + values[j].Trim()); - } - else - { - Console.ForegroundColor = ConsoleColor.Green; - Console.WriteLine("\t\t" + values[j].Trim()); - page.setCookieParam(values[j].Trim()); - } - + Console.ForegroundColor = ConsoleColor.Green; + Console.Write("\t\t" + values[j].Trim()); + page.setCookieParam(values[j].Trim()); + Console.WriteLine(); } - } - else - { - Console.WriteLine("\t" + myWebHeaderCollection.GetKey(i) + " = " + myWebHeaderCollection.Get(i)); + } - + Console.ForegroundColor = ConsoleColor.Gray; } - else if ( // Security related: - //myWebHeaderCollection.GetKey(i).ToLower().Contains("content-type") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-frame-options") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("strict-transport-security") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("permissions-policy") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("cross-origin-opener-policy") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("cross-origin-resource-policy") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("timing-allow-origin") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-origin-time") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("report-to") || // Whenever a user visits a page on your site, their browser sends JSON-formatted reports regarding anything that violates the content security policy to this URL - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-redis") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("x-content-type-options") || - - // Access Control around the idea of Same Origin Policy (SOP). - // Headers that start with, "access-control" are sent back to the browser to tell it what it should* be doing and what it should* have access to across origins (aka sites) - // So these headers are designed to tell the browser how to access Cross Origin Resources (for Sharing) aka CORS so these are 'CORS' headers - myWebHeaderCollection.GetKey(i).ToLower().StartsWith("access-control") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("access-control-allow-credentials") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("access-control-allow-methods") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("access-control-allow-headers") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("access-control-allow-origin") || // Access-Control-Allow-Origin response header to tell the browser that the content of this page is accessible to certain origins. https://stackoverflow.com/questions/10636611/how-does-access-control-allow-origin-header-work - myWebHeaderCollection.GetKey(i).ToLower().Contains("content-security-policy") || - myWebHeaderCollection.GetKey(i).ToLower().Contains("origin")) - { // Security related: - Console.ForegroundColor = ConsoleColor.DarkYellow; - if ( myWebHeaderCollection.GetKey(i).ToLower().Contains("content-security-policy") ) - { // print a new line for each of the values in content-security-policy - Console.ForegroundColor = ConsoleColor.Yellow; - Console.WriteLine("\t" + myWebHeaderCollection.GetKey(i)); - string[] values = myWebHeaderCollection.Get(i).Split(';'); - for(int j = 0; j < values.Length; j++) + else + { + Console.WriteLine("\t" + key + " = " + value); + } + }// end if cookie + else if (key == "http-equiv") + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("\t\t" + value); + Console.ForegroundColor = ConsoleColor.Gray; + } + else if (securityList.Contains(key)) + { + Console.ForegroundColor = ConsoleColor.DarkYellow; + if (key.StartsWith("content-security-policy")) + { // print a new line for each of the values in content-security-policy + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("\t" + key); + string[] values = value.Split(';'); + for (int j = 0; j < values.Length; j++) + { + string policy = values[j].Trim(); + string[] policyArray = policy.Split(' '); + string cspKey = policyArray[0]; + Console.ForegroundColor = ConsoleColor.White; + if (cspKey.Contains("upgrade-insecure-requests")) + Console.ForegroundColor = ConsoleColor.Cyan; + if (cspKey.Contains("block-all-mixed-content")) + Console.ForegroundColor = ConsoleColor.Gray; // Depecated + + Console.Write("\t\t" + cspKey); + + // Break up and isolate the value componentns and highlight them if they are terrible security controls + List list = new List(policyArray); + list.RemoveAt(0); + policyArray = list.ToArray(); + foreach (string param in policyArray) { - string policy = values[j].Trim(); - string[] policyArray = policy.Split(' '); - string key = policyArray[0]; - Console.ForegroundColor = ConsoleColor.White; - if (key.Contains("upgrade-insecure-requests")) - Console.ForegroundColor = ConsoleColor.Cyan; - if (key.Contains("block-all-mixed-content")) - Console.ForegroundColor = ConsoleColor.Gray; // Depecated - - Console.Write("\t\t" + key) ; - - // Break up and isolate the value componentns and highlight them if they are terrible security controls - List list = new List(policyArray); - list.RemoveAt(0); - policyArray = list.ToArray(); - foreach (string param in policyArray) + string normalized = param.ToLower().Trim(); + if ( // Red Color dangerous CSP values + normalized == "'unsafe-inline'" || + normalized == "'unsafe-eval'" || + normalized == "'strict-dynamic'" || + normalized == "*" || + normalized == "https:") { - string normalized = param.ToLower().Trim(); - if ( // Red Color dangerous CSP values - normalized == "'unsafe-inline'" || - normalized == "'unsafe-eval'" || - normalized == "'strict-dynamic'" || - normalized == "*" || - normalized == "https:") - { - Console.ForegroundColor = ConsoleColor.Red; - Console.Write(" " + param); - } - else - { - Console.ForegroundColor = ConsoleColor.Cyan; - Console.Write(" " + param); - } + Console.ForegroundColor = ConsoleColor.Red; + Console.Write(" " + param); + } + else + { + Console.ForegroundColor = ConsoleColor.Cyan; + Console.Write(" " + param); } - Console.WriteLine(); - string value = string.Join(" ", policyArray); - page.setCSPParam(key, value); - - Console.ForegroundColor = ConsoleColor.Gray; - - } - } - else - { // print it yellow without formating - Console.WriteLine("\t" + myWebHeaderCollection.GetKey(i) + " = " + myWebHeaderCollection.Get(i)); - page.processGenericSecurityHeader(myWebHeaderCollection.GetKey(i), myWebHeaderCollection.Get(i)); + Console.WriteLine(); + string cspValue = string.Join(" ", policyArray); + page.setCSPParam(cspKey, cspValue); + + Console.ForegroundColor = ConsoleColor.Gray; } - Console.ForegroundColor = ConsoleColor.Gray; } - else // Everything else - { - Console.WriteLine("\t" + myWebHeaderCollection.GetKey(i).ToLower() + " = " + myWebHeaderCollection.Get(i)); + else + { // print it yellow without formating + Console.WriteLine("\t" + key + " = " + value); + page.processGenericSecurityHeader(key, value); } - - Console.ForegroundColor = ConsoleColor.Gray; } + else // just print if nothing else + { + Console.ForegroundColor = ConsoleColor.Gray; + Console.WriteLine("\t{0} - {1}", key, value); + + } + } // end new/better for loop + - } int counter = 0; foreach (LinkItem link in LinkFinder.Find(html)) @@ -1099,7 +1117,7 @@ static int Main(string[] args) System.Console.WriteLine(page); Console.ForegroundColor = ConsoleColor.Gray; - System.Console.WriteLine("Done. Added "+ counter + " sites. Total is " + links.Count + " Press Enter for next"); + System.Console.WriteLine("Done. Added {0} sites. Total is {1}. Press Enter for next site", counter, links.Count); System.Console.ReadLine(); } // end while loop @@ -1108,6 +1126,28 @@ static int Main(string[] args) return 0; } + private static void writeToFile(string message, string path = @"GeneralRunLog.log") + { + // This text is added only once to the file. + if (!File.Exists(path)) + { + // Create a file to write to. + using (StreamWriter sw = File.CreateText(path)) + { + sw.WriteLine(message); + } + } + + // This text is always added, making the file longer over time + // if it is not deleted. + using (StreamWriter sw = File.AppendText(path)) + { + sw.WriteLine(message); + } + + + } + public static void ShuffleQueue(this Queue queue) { Random rng = new Random(); @@ -1127,33 +1167,3 @@ public static void ShuffleStack(this Stack stack) } } } -/* - -Interesting: - -Scrapping https://www.timeanddate.com/time/map/ - Via = 1.1 varnish -Scrapping https://www.theadvocate.com/baton_rouge/opinion/letters/ - x-loop = 1 - x-robots-tag = noarchive - x-ua-compatible = IE=edge,chrome=1 - x-tncms = 1.61.5; app8; 0.7s; 8M -Scrapping https://www.usatoday.com/media/latest/videos/news/ - Feature-Policy = camera 'none';display-capture 'none';geolocation 'none';microphone 'none';payment 'none';usb 'none';xr-spatial-tracking 'none' - Gannett-Cam-Experience-Id = control:8 -Scrapping http://www.xinhuanet.com/english/index.htm - EagleId -Scrapping https://www.nytimes.com/ - onion-location = https://www.nytimesn7cgmftshazwhfgzm37qxb44r64ytbb2dj3x62d2lljsciiyd.onion/ - X-API-Version = F-F-VI - x-gdpr = 0 - x-api-version -Scrapping https://www.houstonchronicle.com/opinion/ - X-Gen-Mode = full -Scrapping https://postcalc.usps.com/ - x-ruleset-version = 1.3 - - -What does the starter 'X-' mean? - - * */ \ No newline at end of file