From 853e1e8d249fd1e29d0fcc7280d29b03df3d643d Mon Sep 17 00:00:00 2001 From: Zachary Rice Date: Thu, 30 Jan 2025 12:34:07 -0600 Subject: [PATCH] Support exclude regexes, excludewords, and entropy filters for custom detectors (#3860) * Simple implementation of exclude regexes, stopwords, and entropy checks for customdetectors * better name * readme blurb and example * link --- README.md | 6 + examples/generic_with_filters.yml | 1498 +++++++++++++++++ pkg/custom_detectors/custom_detectors.go | 60 + .../custom_detectorspb/custom_detectors.pb.go | 93 +- .../custom_detectors.pb.validate.go | 2 + proto/custom_detectors.proto | 5 + 6 files changed, 1639 insertions(+), 25 deletions(-) create mode 100644 examples/generic_with_filters.yml diff --git a/README.md b/README.md index aed687d97bb0..1ce1b6beed40 100644 --- a/README.md +++ b/README.md @@ -656,6 +656,10 @@ TruffleHog will send a JSON POST request containing the regex matches to a configured webhook endpoint. If the endpoint responds with a `200 OK` response status code, the secret is considered verified. +Custom Detectors support a few different filtering mechanisms: entropy, regex targeting the entire match, regex targeting the captured secret, +and excluded word lists checked against the secret (captured group if present, entire match if capture group is not present). Note that if +your custom detector has multiple `regex` set (in this example `hogID`, and `hogToken`), then the filters get applied to each regex. [Here](examples/generic_with_filters.yml) is an example of a custom detector using these filters. + **NB:** This feature is alpha and subject to change. ## Regex Detector Example @@ -749,6 +753,8 @@ with HTTPServer(('', 8000), Verifier) as server: pass ``` + + ## :mag: Analyze TruffleHog supports running a deeper analysis of a credential to view its permissions and the resources it has access to. diff --git a/examples/generic_with_filters.yml b/examples/generic_with_filters.yml new file mode 100644 index 000000000000..be3f9386eea9 --- /dev/null +++ b/examples/generic_with_filters.yml @@ -0,0 +1,1498 @@ +detectors: +- name: generic-password + keywords: + - pass + - access + - auth + - credential + - cred + - secret + - token + regex: + secret: |- + (?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$) + entropy: 3.5 + # exclude_regexes_capture: + # - |- + # (?i)(?:ignore) + exclude_regexes_match: + - |- + (?i)(?:access(?:ibility|or)|access[_.-]?id|random[_.-]?access|api[_.-]?(?:id|name|version)|rapid|capital|[a-z0-9-]*?api[a-z0-9-]*?:jar:|author|X-MS-Exchange-Organization-Auth|Authentication-Results|(?:credentials?[_.-]?id|withCredentials)|(?:bucket|foreign|hot|idx|natural|primary|pub(?:lic)?|schema|sequence)[_.-]?key|key[_.-]?(?:alias|board|code|frame|id|length|mesh|name|pair|ring|selector|signature|size|stone|storetype|word|up|down|left|right)|key[_.-]?vault[_.-]?(?:id|name)|keyVaultToStoreSecrets|key(?:store|tab)[_.-]?(?:file|path)|issuerkeyhash|(?-i:[DdMm]onkey|[DM]ONKEY)|keying|(?:secret)[_.-]?(?:length|name|size)|UserSecretsId|(?:io\.jsonwebtoken[ \t]?:[ \t]?[\w-]+)|(?:api|credentials|token)[_.-]?(?:endpoint|ur[il])|public[_.-]?token|(?:key|token)[_.-]?file|(?-i:(?:[A-Z_]+=\n[A-Z_]+=|[a-z_]+=\n[a-z_]+=)(?:\n|\z))|(?-i:(?:[A-Z.]+=\n[A-Z.]+=|[a-z.]+=\n[a-z.]+=)(?:\n|\z))) + exclude_words: + - "exclude" + - "000000" + - "aaaaaa" + - "about" + - "abstract" + - "academy" + - "acces" + - "account" + - "act-" + - "act." + - "act_" + - "action" + - "active" + - "actively" + - "activity" + - "adapter" + - "add-" + - "add." + - "add_" + - "add-on" + - "addon" + - "addres" + - "admin" + - "adobe" + - "advanced" + - "adventure" + - "agent" + - "agile" + - "air-" + - "air." + - "air_" + - "ajax" + - "akka" + - "alert" + - "alfred" + - "algorithm" + - "all-" + - "all." + - "all_" + - "alloy" + - "alpha" + - "amazon" + - "amqp" + - "analysi" + - "analytic" + - "analyzer" + - "android" + - "angular" + - "angularj" + - "animate" + - "animation" + - "another" + - "ansible" + - "answer" + - "ant-" + - "ant." + - "ant_" + - "any-" + - "any." + - "any_" + - "apache" + - "app-" + - "app-" + - "app." + - "app." + - "app_" + - "app_" + - "apple" + - "arch" + - "archive" + - "archived" + - "arduino" + - "array" + - "art-" + - "art." + - "art_" + - "article" + - "asp-" + - "asp." + - "asp_" + - "asset" + - "async" + - "atom" + - "attention" + - "audio" + - "audit" + - "aura" + - "auth" + - "author" + - "author" + - "authorize" + - "auto" + - "automated" + - "automatic" + - "awesome" + - "aws_" + - "azure" + - "back" + - "backbone" + - "backend" + - "backup" + - "bar-" + - "bar." + - "bar_" + - "base" + - "based" + - "bash" + - "basic" + - "batch" + - "been" + - "beer" + - "behavior" + - "being" + - "benchmark" + - "best" + - "beta" + - "better" + - "big-" + - "big." + - "big_" + - "binary" + - "binding" + - "bit-" + - "bit." + - "bit_" + - "bitcoin" + - "block" + - "blog" + - "board" + - "book" + - "bookmark" + - "boost" + - "boot" + - "bootstrap" + - "bosh" + - "bot-" + - "bot." + - "bot_" + - "bower" + - "box-" + - "box." + - "box_" + - "boxen" + - "bracket" + - "branch" + - "bridge" + - "browser" + - "brunch" + - "buffer" + - "bug-" + - "bug." + - "bug_" + - "build" + - "builder" + - "building" + - "buildout" + - "buildpack" + - "built" + - "bundle" + - "busines" + - "but-" + - "but." + - "but_" + - "button" + - "cache" + - "caching" + - "cakephp" + - "calendar" + - "call" + - "camera" + - "campfire" + - "can-" + - "can." + - "can_" + - "canva" + - "captcha" + - "capture" + - "card" + - "carousel" + - "case" + - "cassandra" + - "cat-" + - "cat." + - "cat_" + - "category" + - "center" + - "cento" + - "challenge" + - "change" + - "changelog" + - "channel" + - "chart" + - "chat" + - "cheat" + - "check" + - "checker" + - "chef" + - "ches" + - "chinese" + - "chosen" + - "chrome" + - "ckeditor" + - "clas" + - "classe" + - "classic" + - "clean" + - "cli-" + - "cli." + - "cli_" + - "client" + - "client" + - "clojure" + - "clone" + - "closure" + - "cloud" + - "club" + - "cluster" + - "cms-" + - "cms_" + - "coco" + - "code" + - "coding" + - "coffee" + - "color" + - "combination" + - "combo" + - "command" + - "commander" + - "comment" + - "commit" + - "common" + - "community" + - "compas" + - "compiler" + - "complete" + - "component" + - "composer" + - "computer" + - "computing" + - "con-" + - "con." + - "con_" + - "concept" + - "conf" + - "config" + - "config" + - "connect" + - "connector" + - "console" + - "contact" + - "container" + - "contao" + - "content" + - "contest" + - "context" + - "control" + - "convert" + - "converter" + - "conway'" + - "cookbook" + - "cookie" + - "cool" + - "copy" + - "cordova" + - "core" + - "couchbase" + - "couchdb" + - "countdown" + - "counter" + - "course" + - "craft" + - "crawler" + - "create" + - "creating" + - "creator" + - "credential" + - "crm-" + - "crm." + - "crm_" + - "cros" + - "crud" + - "csv-" + - "csv." + - "csv_" + - "cube" + - "cucumber" + - "cuda" + - "current" + - "currently" + - "custom" + - "daemon" + - "dark" + - "dart" + - "dash" + - "dashboard" + - "data" + - "database" + - "date" + - "day-" + - "day." + - "day_" + - "dead" + - "debian" + - "debug" + - "debug" + - "debugger" + - "deck" + - "define" + - "del-" + - "del." + - "del_" + - "delete" + - "demo" + - "deploy" + - "design" + - "designer" + - "desktop" + - "detection" + - "detector" + - "dev-" + - "dev." + - "dev_" + - "develop" + - "developer" + - "device" + - "devise" + - "diff" + - "digital" + - "directive" + - "directory" + - "discovery" + - "display" + - "django" + - "dns-" + - "dns_" + - "doc-" + - "doc-" + - "doc." + - "doc." + - "doc_" + - "doc_" + - "docker" + - "docpad" + - "doctrine" + - "document" + - "doe-" + - "doe." + - "doe_" + - "dojo" + - "dom-" + - "dom." + - "dom_" + - "domain" + - "done" + - "don't" + - "dot-" + - "dot." + - "dot_" + - "dotfile" + - "download" + - "draft" + - "drag" + - "drill" + - "drive" + - "driven" + - "driver" + - "drop" + - "dropbox" + - "drupal" + - "dsl-" + - "dsl." + - "dsl_" + - "dynamic" + - "easy" + - "_ec2_" + - "ecdsa" + - "eclipse" + - "edit" + - "editing" + - "edition" + - "editor" + - "element" + - "emac" + - "email" + - "embed" + - "embedded" + - "ember" + - "emitter" + - "emulator" + - "encoding" + - "endpoint" + - "engine" + - "english" + - "enhanced" + - "entity" + - "entry" + - "env_" + - "episode" + - "erlang" + - "error" + - "espresso" + - "event" + - "evented" + - "example" + - "example" + - "exchange" + - "exercise" + - "experiment" + - "expire" + - "exploit" + - "explorer" + - "export" + - "exporter" + - "expres" + - "ext-" + - "ext." + - "ext_" + - "extended" + - "extension" + - "external" + - "extra" + - "extractor" + - "fabric" + - "facebook" + - "factory" + - "fake" + - "fast" + - "feature" + - "feed" + - "fewfwef" + - "ffmpeg" + - "field" + - "file" + - "filter" + - "find" + - "finder" + - "firefox" + - "firmware" + - "first" + - "fish" + - "fix-" + - "fix_" + - "flash" + - "flask" + - "flat" + - "flex" + - "flexible" + - "flickr" + - "flow" + - "fluent" + - "fluentd" + - "fluid" + - "folder" + - "font" + - "force" + - "foreman" + - "fork" + - "form" + - "format" + - "formatter" + - "forum" + - "foundry" + - "framework" + - "free" + - "friend" + - "friendly" + - "front-end" + - "frontend" + - "ftp-" + - "ftp." + - "ftp_" + - "fuel" + - "full" + - "fun-" + - "fun." + - "fun_" + - "func" + - "future" + - "gaia" + - "gallery" + - "game" + - "gateway" + - "gem-" + - "gem." + - "gem_" + - "gen-" + - "gen." + - "gen_" + - "general" + - "generator" + - "generic" + - "genetic" + - "get-" + - "get." + - "get_" + - "getenv" + - "getting" + - "ghost" + - "gist" + - "git-" + - "git." + - "git_" + - "github" + - "gitignore" + - "gitlab" + - "glas" + - "gmail" + - "gnome" + - "gnu-" + - "gnu." + - "gnu_" + - "goal" + - "golang" + - "gollum" + - "good" + - "google" + - "gpu-" + - "gpu." + - "gpu_" + - "gradle" + - "grail" + - "graph" + - "graphic" + - "great" + - "grid" + - "groovy" + - "group" + - "grunt" + - "guard" + - "gui-" + - "gui." + - "gui_" + - "guide" + - "guideline" + - "gulp" + - "gwt-" + - "gwt." + - "gwt_" + - "hack" + - "hackathon" + - "hacker" + - "hacking" + - "hadoop" + - "haml" + - "handler" + - "hardware" + - "has-" + - "has_" + - "hash" + - "haskell" + - "have" + - "haxe" + - "hello" + - "help" + - "helper" + - "here" + - "hero" + - "heroku" + - "high" + - "hipchat" + - "history" + - "home" + - "homebrew" + - "homepage" + - "hook" + - "host" + - "hosting" + - "hot-" + - "hot." + - "hot_" + - "house" + - "how-" + - "how." + - "how_" + - "html" + - "http" + - "hub-" + - "hub." + - "hub_" + - "hubot" + - "human" + - "icon" + - "ide-" + - "ide." + - "ide_" + - "idea" + - "identity" + - "idiomatic" + - "image" + - "impact" + - "import" + - "important" + - "importer" + - "impres" + - "index" + - "infinite" + - "info" + - "injection" + - "inline" + - "input" + - "inside" + - "inspector" + - "instagram" + - "install" + - "installer" + - "instant" + - "intellij" + - "interface" + - "internet" + - "interview" + - "into" + - "intro" + - "ionic" + - "iphone" + - "ipython" + - "irc-" + - "irc_" + - "iso-" + - "iso." + - "iso_" + - "issue" + - "jade" + - "jasmine" + - "java" + - "jbos" + - "jekyll" + - "jenkin" + - "jetbrains" + - "job-" + - "job." + - "job_" + - "joomla" + - "jpa-" + - "jpa." + - "jpa_" + - "jquery" + - "json" + - "just" + - "kafka" + - "karma" + - "kata" + - "kernel" + - "keyboard" + - "kindle" + - "kit-" + - "kit." + - "kit_" + - "kitchen" + - "knife" + - "koan" + - "kohana" + - "lab-" + - "lab-" + - "lab." + - "lab." + - "lab_" + - "lab_" + - "lambda" + - "lamp" + - "language" + - "laravel" + - "last" + - "latest" + - "latex" + - "launcher" + - "layer" + - "layout" + - "lazy" + - "ldap" + - "leaflet" + - "league" + - "learn" + - "learning" + - "led-" + - "led." + - "led_" + - "leetcode" + - "les-" + - "les." + - "les_" + - "level" + - "leveldb" + - "lib-" + - "lib." + - "lib_" + - "librarie" + - "library" + - "license" + - "life" + - "liferay" + - "light" + - "lightbox" + - "like" + - "line" + - "link" + - "linked" + - "linkedin" + - "linux" + - "lisp" + - "list" + - "lite" + - "little" + - "load" + - "loader" + - "local" + - "location" + - "lock" + - "log-" + - "log." + - "log_" + - "logger" + - "logging" + - "logic" + - "login" + - "logstash" + - "longer" + - "look" + - "love" + - "lua-" + - "lua." + - "lua_" + - "mac-" + - "mac." + - "mac_" + - "machine" + - "made" + - "magento" + - "magic" + - "mail" + - "make" + - "maker" + - "making" + - "man-" + - "man." + - "man_" + - "manage" + - "manager" + - "manifest" + - "manual" + - "map-" + - "map-" + - "map." + - "map." + - "map_" + - "map_" + - "mapper" + - "mapping" + - "markdown" + - "markup" + - "master" + - "math" + - "matrix" + - "maven" + - "md5" + - "mean" + - "media" + - "mediawiki" + - "meetup" + - "memcached" + - "memory" + - "menu" + - "merchant" + - "message" + - "messaging" + - "meta" + - "metadata" + - "meteor" + - "method" + - "metric" + - "micro" + - "middleman" + - "migration" + - "minecraft" + - "miner" + - "mini" + - "minimal" + - "mirror" + - "mit-" + - "mit." + - "mit_" + - "mobile" + - "mocha" + - "mock" + - "mod-" + - "mod." + - "mod_" + - "mode" + - "model" + - "modern" + - "modular" + - "module" + - "modx" + - "money" + - "mongo" + - "mongodb" + - "mongoid" + - "mongoose" + - "monitor" + - "monkey" + - "more" + - "motion" + - "moved" + - "movie" + - "mozilla" + - "mqtt" + - "mule" + - "multi" + - "multiple" + - "music" + - "mustache" + - "mvc-" + - "mvc." + - "mvc_" + - "mysql" + - "nagio" + - "name" + - "native" + - "need" + - "neo-" + - "neo." + - "neo_" + - "nest" + - "nested" + - "net-" + - "net." + - "net_" + - "nette" + - "network" + - "new-" + - "new-" + - "new." + - "new." + - "new_" + - "new_" + - "next" + - "nginx" + - "ninja" + - "nlp-" + - "nlp." + - "nlp_" + - "node" + - "nodej" + - "nosql" + - "not-" + - "not." + - "not_" + - "note" + - "notebook" + - "notepad" + - "notice" + - "notifier" + - "now-" + - "now." + - "now_" + - "number" + - "oauth" + - "object" + - "objective" + - "obsolete" + - "ocaml" + - "octopres" + - "official" + - "old-" + - "old." + - "old_" + - "onboard" + - "online" + - "only" + - "open" + - "opencv" + - "opengl" + - "openshift" + - "openwrt" + - "option" + - "oracle" + - "org-" + - "org." + - "org_" + - "origin" + - "original" + - "orm-" + - "orm." + - "orm_" + - "osx-" + - "osx_" + - "our-" + - "our." + - "our_" + - "out-" + - "out." + - "out_" + - "output" + - "over" + - "overview" + - "own-" + - "own." + - "own_" + - "pack" + - "package" + - "packet" + - "page" + - "page" + - "panel" + - "paper" + - "paperclip" + - "para" + - "parallax" + - "parallel" + - "parse" + - "parser" + - "parsing" + - "particle" + - "party" + - "password" + - "patch" + - "path" + - "pattern" + - "payment" + - "paypal" + - "pdf-" + - "pdf." + - "pdf_" + - "pebble" + - "people" + - "perl" + - "personal" + - "phalcon" + - "phoenix" + - "phone" + - "phonegap" + - "photo" + - "php-" + - "php." + - "php_" + - "physic" + - "picker" + - "pipeline" + - "platform" + - "play" + - "player" + - "please" + - "plu-" + - "plu." + - "plu_" + - "plug-in" + - "plugin" + - "plupload" + - "png-" + - "png." + - "png_" + - "poker" + - "polyfill" + - "polymer" + - "pool" + - "pop-" + - "pop." + - "pop_" + - "popcorn" + - "popup" + - "port" + - "portable" + - "portal" + - "portfolio" + - "post" + - "power" + - "powered" + - "powerful" + - "prelude" + - "pretty" + - "preview" + - "principle" + - "print" + - "pro-" + - "pro." + - "pro_" + - "problem" + - "proc" + - "product" + - "profile" + - "profiler" + - "program" + - "progres" + - "project" + - "protocol" + - "prototype" + - "provider" + - "proxy" + - "public" + - "pull" + - "puppet" + - "pure" + - "purpose" + - "push" + - "pusher" + - "pyramid" + - "python" + - "quality" + - "query" + - "queue" + - "quick" + - "rabbitmq" + - "rack" + - "radio" + - "rail" + - "railscast" + - "random" + - "range" + - "raspberry" + - "rdf-" + - "rdf." + - "rdf_" + - "react" + - "reactive" + - "read" + - "reader" + - "readme" + - "ready" + - "real" + - "reality" + - "real-time" + - "realtime" + - "recipe" + - "recorder" + - "red-" + - "red." + - "red_" + - "reddit" + - "redi" + - "redmine" + - "reference" + - "refinery" + - "refresh" + - "registry" + - "related" + - "release" + - "remote" + - "rendering" + - "repo" + - "report" + - "request" + - "require" + - "required" + - "requirej" + - "research" + - "resource" + - "response" + - "resque" + - "rest" + - "restful" + - "resume" + - "reveal" + - "reverse" + - "review" + - "riak" + - "rich" + - "right" + - "ring" + - "robot" + - "role" + - "room" + - "router" + - "routing" + - "rpc-" + - "rpc." + - "rpc_" + - "rpg-" + - "rpg." + - "rpg_" + - "rspec" + - "ruby-" + - "ruby." + - "ruby_" + - "rule" + - "run-" + - "run." + - "run_" + - "runner" + - "running" + - "runtime" + - "rust" + - "rvm-" + - "rvm." + - "rvm_" + - "salt" + - "sample" + - "sample" + - "sandbox" + - "sas-" + - "sas." + - "sas_" + - "sbt-" + - "sbt." + - "sbt_" + - "scala" + - "scalable" + - "scanner" + - "schema" + - "scheme" + - "school" + - "science" + - "scraper" + - "scratch" + - "screen" + - "script" + - "scroll" + - "scs-" + - "scs." + - "scs_" + - "sdk-" + - "sdk." + - "sdk_" + - "sdl-" + - "sdl." + - "sdl_" + - "search" + - "secure" + - "security" + - "see-" + - "see." + - "see_" + - "seed" + - "select" + - "selector" + - "selenium" + - "semantic" + - "sencha" + - "send" + - "sentiment" + - "serie" + - "server" + - "service" + - "session" + - "set-" + - "set." + - "set_" + - "setting" + - "setting" + - "setup" + - "sha1" + - "sha2" + - "sha256" + - "share" + - "shared" + - "sharing" + - "sheet" + - "shell" + - "shield" + - "shipping" + - "shop" + - "shopify" + - "shortener" + - "should" + - "show" + - "showcase" + - "side" + - "silex" + - "simple" + - "simulator" + - "single" + - "site" + - "skeleton" + - "sketch" + - "skin" + - "slack" + - "slide" + - "slider" + - "slim" + - "small" + - "smart" + - "smtp" + - "snake" + - "snapshot" + - "snippet" + - "soap" + - "social" + - "socket" + - "software" + - "solarized" + - "solr" + - "solution" + - "solver" + - "some" + - "soon" + - "source" + - "space" + - "spark" + - "spatial" + - "spec" + - "sphinx" + - "spine" + - "spotify" + - "spree" + - "spring" + - "sprite" + - "sql-" + - "sql." + - "sql_" + - "sqlite" + - "ssh-" + - "ssh." + - "ssh_" + - "stack" + - "staging" + - "standard" + - "stanford" + - "start" + - "started" + - "starter" + - "startup" + - "stat" + - "statamic" + - "state" + - "static" + - "statistic" + - "statsd" + - "statu" + - "steam" + - "step" + - "still" + - "stm-" + - "stm." + - "stm_" + - "storage" + - "store" + - "storm" + - "story" + - "strategy" + - "stream" + - "streaming" + - "string" + - "stripe" + - "structure" + - "studio" + - "study" + - "stuff" + - "style" + - "sublime" + - "sugar" + - "suite" + - "summary" + - "super" + - "support" + - "supported" + - "svg-" + - "svg." + - "svg_" + - "svn-" + - "svn." + - "svn_" + - "swagger" + - "swift" + - "switch" + - "switcher" + - "symfony" + - "symphony" + - "sync" + - "synopsi" + - "syntax" + - "system" + - "system" + - "tab-" + - "tab-" + - "tab." + - "tab." + - "tab_" + - "tab_" + - "table" + - "tag-" + - "tag-" + - "tag." + - "tag." + - "tag_" + - "tag_" + - "talk" + - "target" + - "task" + - "tcp-" + - "tcp." + - "tcp_" + - "tdd-" + - "tdd." + - "tdd_" + - "team" + - "tech" + - "template" + - "term" + - "terminal" + - "testing" + - "tetri" + - "text" + - "textmate" + - "theme" + - "theory" + - "three" + - "thrift" + - "time" + - "timeline" + - "timer" + - "tiny" + - "tinymce" + - "tip-" + - "tip." + - "tip_" + - "title" + - "todo" + - "todomvc" + - "token" + - "tool" + - "toolbox" + - "toolkit" + - "top-" + - "top." + - "top_" + - "tornado" + - "touch" + - "tower" + - "tracker" + - "tracking" + - "traffic" + - "training" + - "transfer" + - "translate" + - "transport" + - "tree" + - "trello" + - "try-" + - "try." + - "try_" + - "tumblr" + - "tut-" + - "tut." + - "tut_" + - "tutorial" + - "tweet" + - "twig" + - "twitter" + - "type" + - "typo" + - "ubuntu" + - "uiview" + - "ultimate" + - "under" + - "unit" + - "unity" + - "universal" + - "unix" + - "update" + - "updated" + - "upgrade" + - "upload" + - "uploader" + - "uri-" + - "uri." + - "uri_" + - "url-" + - "url." + - "url_" + - "usage" + - "usb-" + - "usb." + - "usb_" + - "use-" + - "use." + - "use_" + - "used" + - "useful" + - "user" + - "using" + - "util" + - "utilitie" + - "utility" + - "vagrant" + - "validator" + - "value" + - "variou" + - "varnish" + - "version" + - "via-" + - "via." + - "via_" + - "video" + - "view" + - "viewer" + - "vim-" + - "vim." + - "vim_" + - "vimrc" + - "virtual" + - "vision" + - "visual" + - "vpn" + - "want" + - "warning" + - "watch" + - "watcher" + - "wave" + - "way-" + - "way." + - "way_" + - "weather" + - "web-" + - "web_" + - "webapp" + - "webgl" + - "webhook" + - "webkit" + - "webrtc" + - "website" + - "websocket" + - "welcome" + - "welcome" + - "what" + - "what'" + - "when" + - "where" + - "which" + - "why-" + - "why." + - "why_" + - "widget" + - "wifi" + - "wiki" + - "win-" + - "win." + - "win_" + - "window" + - "wip-" + - "wip." + - "wip_" + - "within" + - "without" + - "wizard" + - "word" + - "wordpres" + - "work" + - "worker" + - "workflow" + - "working" + - "workshop" + - "world" + - "wrapper" + - "write" + - "writer" + - "writing" + - "written" + - "www-" + - "www." + - "www_" + - "xamarin" + - "xcode" + - "xml-" + - "xml." + - "xml_" + - "xmpp" + - "xxxxxx" + - "yahoo" + - "yaml" + - "yandex" + - "yeoman" + - "yet-" + - "yet." + - "yet_" + - "yii-" + - "yii." + - "yii_" + - "youtube" + - "yui-" + - "yui." + - "yui_" + - "zend" + - "zero" + - "zip-" + - "zip." + - "zip_" + - "zsh-" + - "zsh." + - "zsh_" \ No newline at end of file diff --git a/pkg/custom_detectors/custom_detectors.go b/pkg/custom_detectors/custom_detectors.go index 93f106c38b15..24eb56dafab3 100644 --- a/pkg/custom_detectors/custom_detectors.go +++ b/pkg/custom_detectors/custom_detectors.go @@ -65,6 +65,28 @@ func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []b dataStr := string(data) regexMatches := make(map[string][][]string, len(c.GetRegex())) + // Compile exclude regexes targeting the capture group + excludeRegexesCapture := make([]*regexp.Regexp, 0, len(c.GetExcludeRegexesCapture())) + for _, exclude := range c.GetExcludeRegexesCapture() { + regex, err := regexp.Compile(exclude) + if err != nil { + // This will only happen if the regex is invalid. + return nil, err + } + excludeRegexesCapture = append(excludeRegexesCapture, regex) + } + + // Compile exclude regexes targeting the entire match + excludeRegexes := make([]*regexp.Regexp, 0, len(c.GetExcludeRegexesMatch())) + for _, exclude := range c.GetExcludeRegexesMatch() { + regex, err := regexp.Compile(exclude) + if err != nil { + // This will only happen if the regex is invalid. + return nil, err + } + excludeRegexes = append(excludeRegexes, regex) + } + // Find all submatches for each regex. for name, regex := range c.GetRegex() { regex, err := regexp.Compile(regex) @@ -91,7 +113,45 @@ func (c *CustomRegexWebhook) FromData(ctx context.Context, verify bool, data []b // Create result object and test for verification. resultsCh := make(chan detectors.Result, maxTotalMatches) + +MatchLoop: for _, match := range matches { + for _, values := range match { + // attempt to use capture group + secret := values[0] + if len(values) > 1 { + secret = values[1] + } + + // check entropy + entropy := c.GetEntropy() + if entropy > 0.0 && detectors.StringShannonEntropy(secret) < float64(entropy) { + continue MatchLoop + } + + // check for exclude words + for _, excludeWord := range c.GetExcludeWords() { + if strings.Contains(strings.ToLower(secret), excludeWord) { + continue MatchLoop + } + } + + // exclude checks + for _, excludeMatch := range excludeRegexes { + if excludeMatch.MatchString(values[0]) { + continue MatchLoop + } + } + + // exclude secret (capture group), or if no capture group is set, + // check against entire match. + for _, excludeSecret := range excludeRegexesCapture { + if excludeSecret.MatchString(secret) { + continue MatchLoop + } + } + } + g.Go(func() error { return c.createResults(ctx, match, verify, resultsCh) }) diff --git a/pkg/pb/custom_detectorspb/custom_detectors.pb.go b/pkg/pb/custom_detectorspb/custom_detectors.pb.go index 3b2fc2e3a929..bc22e937ef50 100644 --- a/pkg/pb/custom_detectorspb/custom_detectors.pb.go +++ b/pkg/pb/custom_detectorspb/custom_detectors.pb.go @@ -73,11 +73,15 @@ type CustomRegex struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - Keywords []string `protobuf:"bytes,2,rep,name=keywords,proto3" json:"keywords,omitempty"` - Regex map[string]string `protobuf:"bytes,3,rep,name=regex,proto3" json:"regex,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - Verify []*VerifierConfig `protobuf:"bytes,4,rep,name=verify,proto3" json:"verify,omitempty"` - Description string `protobuf:"bytes,5,opt,name=description,proto3" json:"description,omitempty"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Keywords []string `protobuf:"bytes,2,rep,name=keywords,proto3" json:"keywords,omitempty"` + Regex map[string]string `protobuf:"bytes,3,rep,name=regex,proto3" json:"regex,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + Verify []*VerifierConfig `protobuf:"bytes,4,rep,name=verify,proto3" json:"verify,omitempty"` + Description string `protobuf:"bytes,5,opt,name=description,proto3" json:"description,omitempty"` + ExcludeRegexesCapture []string `protobuf:"bytes,6,rep,name=exclude_regexes_capture,json=excludeRegexesCapture,proto3" json:"exclude_regexes_capture,omitempty"` + ExcludeWords []string `protobuf:"bytes,7,rep,name=exclude_words,json=excludeWords,proto3" json:"exclude_words,omitempty"` + Entropy float32 `protobuf:"fixed32,8,opt,name=entropy,proto3" json:"entropy,omitempty"` + ExcludeRegexesMatch []string `protobuf:"bytes,9,rep,name=exclude_regexes_match,json=excludeRegexesMatch,proto3" json:"exclude_regexes_match,omitempty"` } func (x *CustomRegex) Reset() { @@ -147,6 +151,34 @@ func (x *CustomRegex) GetDescription() string { return "" } +func (x *CustomRegex) GetExcludeRegexesCapture() []string { + if x != nil { + return x.ExcludeRegexesCapture + } + return nil +} + +func (x *CustomRegex) GetExcludeWords() []string { + if x != nil { + return x.ExcludeWords + } + return nil +} + +func (x *CustomRegex) GetEntropy() float32 { + if x != nil { + return x.Entropy + } + return 0 +} + +func (x *CustomRegex) GetExcludeRegexesMatch() []string { + if x != nil { + return x.ExcludeRegexesMatch + } + return nil +} + type VerifierConfig struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -230,7 +262,7 @@ var file_custom_detectors_proto_rawDesc = []byte{ 0x6f, 0x72, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x52, 0x65, 0x67, 0x65, 0x78, 0x52, 0x09, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, - 0x6f, 0x72, 0x73, 0x22, 0x93, 0x02, 0x0a, 0x0b, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x52, 0x65, + 0x6f, 0x72, 0x73, 0x22, 0xbe, 0x03, 0x0a, 0x0b, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x52, 0x65, 0x67, 0x65, 0x78, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x6b, 0x65, 0x79, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x6b, 0x65, 0x79, 0x77, 0x6f, @@ -243,25 +275,36 @@ var file_custom_detectors_proto_rawDesc = []byte{ 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x2e, 0x56, 0x65, 0x72, 0x69, 0x66, 0x69, 0x65, 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x06, 0x76, 0x65, 0x72, 0x69, 0x66, 0x79, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x1a, - 0x38, 0x0a, 0x0a, 0x52, 0x65, 0x67, 0x65, 0x78, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x8e, 0x01, 0x0a, 0x0e, 0x56, 0x65, - 0x72, 0x69, 0x66, 0x69, 0x65, 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x24, 0x0a, 0x08, - 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x08, - 0xfa, 0x42, 0x05, 0x72, 0x03, 0x90, 0x01, 0x01, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, - 0x6e, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x75, 0x6e, 0x73, 0x61, 0x66, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x06, 0x75, 0x6e, 0x73, 0x61, 0x66, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x68, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x68, 0x65, 0x61, - 0x64, 0x65, 0x72, 0x73, 0x12, 0x24, 0x0a, 0x0d, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x52, - 0x61, 0x6e, 0x67, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x75, 0x63, - 0x63, 0x65, 0x73, 0x73, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x73, 0x42, 0x44, 0x5a, 0x42, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, - 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, - 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x63, 0x75, - 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, + 0x36, 0x0a, 0x17, 0x65, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x72, 0x65, 0x67, 0x65, 0x78, + 0x65, 0x73, 0x5f, 0x63, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x18, 0x06, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x15, 0x65, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, 0x67, 0x65, 0x78, 0x65, 0x73, + 0x43, 0x61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x63, 0x6c, 0x75, + 0x64, 0x65, 0x5f, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, + 0x65, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x57, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x18, 0x0a, 0x07, + 0x65, 0x6e, 0x74, 0x72, 0x6f, 0x70, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x02, 0x52, 0x07, 0x65, + 0x6e, 0x74, 0x72, 0x6f, 0x70, 0x79, 0x12, 0x32, 0x0a, 0x15, 0x65, 0x78, 0x63, 0x6c, 0x75, 0x64, + 0x65, 0x5f, 0x72, 0x65, 0x67, 0x65, 0x78, 0x65, 0x73, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x18, + 0x09, 0x20, 0x03, 0x28, 0x09, 0x52, 0x13, 0x65, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x52, 0x65, + 0x67, 0x65, 0x78, 0x65, 0x73, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x1a, 0x38, 0x0a, 0x0a, 0x52, 0x65, + 0x67, 0x65, 0x78, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x3a, 0x02, 0x38, 0x01, 0x22, 0x8e, 0x01, 0x0a, 0x0e, 0x56, 0x65, 0x72, 0x69, 0x66, 0x69, 0x65, + 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x24, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, + 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x08, 0xfa, 0x42, 0x05, 0x72, 0x03, + 0x90, 0x01, 0x01, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x16, 0x0a, + 0x06, 0x75, 0x6e, 0x73, 0x61, 0x66, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x75, + 0x6e, 0x73, 0x61, 0x66, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x73, 0x12, + 0x24, 0x0a, 0x0d, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x73, + 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0d, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x52, + 0x61, 0x6e, 0x67, 0x65, 0x73, 0x42, 0x44, 0x5a, 0x42, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, + 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, + 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, + 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, } var ( diff --git a/pkg/pb/custom_detectorspb/custom_detectors.pb.validate.go b/pkg/pb/custom_detectorspb/custom_detectors.pb.validate.go index 774aa0515da7..9a08130e0dcb 100644 --- a/pkg/pb/custom_detectorspb/custom_detectors.pb.validate.go +++ b/pkg/pb/custom_detectorspb/custom_detectors.pb.validate.go @@ -231,6 +231,8 @@ func (m *CustomRegex) validate(all bool) error { // no validation rules for Description + // no validation rules for Entropy + if len(errors) > 0 { return CustomRegexMultiError(errors) } diff --git a/proto/custom_detectors.proto b/proto/custom_detectors.proto index 943446165e9e..2c01d5c2dba6 100644 --- a/proto/custom_detectors.proto +++ b/proto/custom_detectors.proto @@ -16,8 +16,13 @@ message CustomRegex { map regex = 3; repeated VerifierConfig verify = 4; string description = 5; + repeated string exclude_regexes_capture = 6; + repeated string exclude_words = 7; + float entropy = 8; + repeated string exclude_regexes_match = 9; } + message VerifierConfig { string endpoint = 1 [(validate.rules).string.uri_ref = true]; bool unsafe = 2;