-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
Copy pathfalsepositives.go
125 lines (100 loc) · 2.87 KB
/
falsepositives.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package detectors
import (
_ "embed"
"math"
"strings"
"unicode"
"unicode/utf8"
ahocorasick "github.com/BobuSumisu/aho-corasick"
)
var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
type FalsePositive string
//go:embed "badlist.txt"
var badList []byte
//go:embed "words.txt"
var wordList []byte
//go:embed "programmingbooks.txt"
var programmingBookWords []byte
var filter *ahocorasick.Trie
func init() {
builder := ahocorasick.NewTrieBuilder()
wordList := bytesToCleanWordList(wordList)
builder.AddStrings(wordList)
badList := bytesToCleanWordList(badList)
builder.AddStrings(badList)
programmingBookWords := bytesToCleanWordList(programmingBookWords)
builder.AddStrings(programmingBookWords)
filter = builder.Build()
}
// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
// Currently that includes: No number, english word in key, or matches common example pattens.
// Only the secret key material should be passed into this function
func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordCheck bool) bool {
if !utf8.ValidString(match) {
return true
}
lower := strings.ToLower(match)
for _, fp := range falsePositives {
if strings.Contains(lower, string(fp)) {
return true
}
}
if wordCheck {
if filter.MatchFirstString(lower) != nil {
return true
}
}
return false
}
func HasDigit(key string) bool {
for _, ch := range key {
if unicode.IsDigit(ch) {
return true
}
}
return false
}
func bytesToCleanWordList(data []byte) []string {
words := make(map[string]struct{})
for _, word := range strings.Split(string(data), "\n") {
if strings.TrimSpace(word) != "" {
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
}
}
wordList := make([]string, 0, len(words))
for word := range words {
wordList = append(wordList, word)
}
return wordList
}
func StringShannonEntropy(input string) float64 {
chars := make(map[rune]float64)
inverseTotal := 1 / float64(len(input)) // precompute the inverse
for _, char := range input {
chars[char]++
}
entropy := 0.0
for _, count := range chars {
probability := count * inverseTotal
entropy += probability * math.Log2(probability)
}
return -entropy
}
// FilterResultsWithEntropy filters out determinately unverified results that have a shannon entropy below the given value.
func FilterResultsWithEntropy(results []Result, entropy float64) []Result {
var filteredResults []Result
for _, result := range results {
if !result.Verified {
if result.RawV2 != nil {
if StringShannonEntropy(string(result.RawV2)) >= entropy {
filteredResults = append(filteredResults, result)
}
} else {
if StringShannonEntropy(string(result.Raw)) >= entropy {
filteredResults = append(filteredResults, result)
}
}
}
}
return filteredResults
}