Skip to content

Commit

Permalink
Update parser (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter-Jan Brone authored Jul 28, 2022
1 parent c3c9466 commit b8f2c2e
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 19 deletions.
35 changes: 24 additions & 11 deletions email/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,21 @@ const (
)

var (
// extractSkylink64RE is a regex that is capable of extracting base-64
// encoded skylinks from text
extractSkylink64RE = regexp.MustCompile(".+?://.+?\\..+?/([a-zA-Z0-9-_]{46})")
// extractSkylink64RE and extractSkylink64RE_2 are regexes capable of
// extracting base-64 encoded skylinks from text
extractSkylink64RE = regexp.MustCompile(`.+?://.+?\..+?/([a-zA-Z0-9-_]{46})`)
extractSkylink64RE_2 = regexp.MustCompile(`(http.+|hxxp.+|\..+|://.+|^)([a-zA-Z0-9-_]{46})(\?.*)?$`)

// extractSkylink32RE is a regex that is capable of extracting base-32
// encoded skylinks from text
extractSkylink32RE = regexp.MustCompile(".+?://.*?([a-zA-Z0-9-_]{55})")
// extractSkylink32RE and extractSkylink32RE_2 are regexes capable of
// extracting base-32 encoded skylinks from text
extractSkylink32RE = regexp.MustCompile(`(?i).+?://.*?([a-z0-9]{55})`)
extractSkylink32RE_2 = regexp.MustCompile(`(?i)(http.+|hxxp.+|\..+|://.+|^)([a-z0-9]{55})(\?.*)?$`)

validateSkylink64RE = regexp.MustCompile("^([a-zA-Z0-9-_]{46})$")
validateSkylink32RE = regexp.MustCompile("^([a-zA-Z0-9-_]{55})$")
// space matches all whitespace
space = regexp.MustCompile(`\s+`)

validateSkylink64RE = regexp.MustCompile(`^([a-zA-Z0-9-_]{46})$`)
validateSkylink32RE = regexp.MustCompile(`(?i)^([a-z0-9]{55})$`)
)

type (
Expand Down Expand Up @@ -305,11 +310,19 @@ func extractSkylinks(input []byte) []string {
for sc.Scan() {
for _, line := range []string{
sc.Text(),
strings.ReplaceAll(sc.Text(), " ", ""),
space.ReplaceAllString(sc.Text(), ""),
} {
for _, matches := range append(
base64matches := append(
extractSkylink64RE.FindAllStringSubmatch(line, -1),
extractSkylink32RE.FindAllStringSubmatch(line, -1)...,
extractSkylink64RE_2.FindAllStringSubmatch(line, -1)...,
)
base32matches := append(
extractSkylink32RE.FindAllStringSubmatch(line, -1),
extractSkylink32RE_2.FindAllStringSubmatch(line, -1)...,
)
for _, matches := range append(
base64matches,
base32matches...,
) {
for _, match := range matches {
if validateSkylink64RE.Match([]byte(match)) {
Expand Down
27 changes: 19 additions & 8 deletions email/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ var (
hxxps:// siasky [.] net/CADEnmNNR6arnyDSH60MlGjQK5O3Sv-ecK1PGt3MNmQUhA#apg@franklinbank [.] com
hxxps:// siasky [.] net/GABJJhT8AlfNh-XS-6YVH8en7O-t377ej9XS2eclnv2yFg
https:// siasky [.]netAAAg4mZrsNcedNPazZ4kSFAYBzf7f8ZgHO1Tu1L-NN8Gjg
BBBg4mZrsNcedNPazZ4kSFAYBzf7f8ZgHO1Tu1L-NN8Gjg
As a reminder, phishing is expressly prohibited by our Universal Terms of Service Agreement, paragraph 7. "Acceptable Use Policy (AUP)"
`)

Expand Down Expand Up @@ -254,17 +257,21 @@ func testExtractSkylinks(t *testing.T) {

// extract skylinks
skylinks = extractSkylinks(exampleBody)
if len(skylinks) != 4 {
t.Fatalf("unexpected amount of skylinks found, %v != 4", len(skylinks))
sort.Strings(skylinks)
if len(skylinks) != 6 {
t.Fatalf("unexpected amount of skylinks found, %v != 6, skylinks %+v", len(skylinks), skylinks)
}

// assert we have extracted the correct skylinks
//
// NOTE: we didn't discover IGzqsAmjjLJjN3Or8ZFb9AGX4Km12EJu5AVmgaX8HWNy7Q
// which could have been a false positive as it's a valid skylink
sort.Strings(skylinks)
if skylinks[0] != "CADEnmNNR6arnyDSH60MlGjQK5O3Sv-ecK1PGt3MNmQUhA" ||
skylinks[1] != "GABJJhT8AlfNh-XS-6YVH8en7O-t377ej9XS2eclnv2yFg" || skylinks[2] != "GAEE7l0IkIVcVEHDgRCcNkRYS8keZKr9v_ffxf9_614m6g" || skylinks[3] != "nAA_hbtNaOYyR2WrM9UNIc5jRu4WfGy5QK_iTGosDgLmSA" {
if skylinks[0] != "AAAg4mZrsNcedNPazZ4kSFAYBzf7f8ZgHO1Tu1L-NN8Gjg" ||
skylinks[1] != "BBBg4mZrsNcedNPazZ4kSFAYBzf7f8ZgHO1Tu1L-NN8Gjg" ||
skylinks[2] != "CADEnmNNR6arnyDSH60MlGjQK5O3Sv-ecK1PGt3MNmQUhA" ||
skylinks[3] != "GABJJhT8AlfNh-XS-6YVH8en7O-t377ej9XS2eclnv2yFg" ||
skylinks[4] != "GAEE7l0IkIVcVEHDgRCcNkRYS8keZKr9v_ffxf9_614m6g" ||
skylinks[5] != "nAA_hbtNaOYyR2WrM9UNIc5jRu4WfGy5QK_iTGosDgLmSA" {
t.Fatal("unexpected skylinks", skylinks)
}

Expand All @@ -277,9 +284,13 @@ func testExtractSkylinks(t *testing.T) {
hxxps:// 7g01n1fmusamd3k4c5l7ahb39356rfhfs92e9mjshj1vq93vk891m2o [.] siasky [.] net
hxxps:// [.] eu-ger-1 [.] siasky [.] net / 1005m6ki628f5t2o74h1qirph34lcavbn52oj7e2oan533sj3cgbr1o
hxxps:// [.] eu-ger-1 [.] siasky [.] net2005m6KI628f5t2o74h1qirph34lcavbn52oj7e2oan533sj3cgbr2b
3005m6ki628f5t2o74h1qirph34lcavbn52oj7e2oan533sj3cgbr2b
`))
if len(skylinks) != 2 {
t.Fatalf("unexpected amount of skylinks found, %v != 2", len(skylinks))
if len(skylinks) != 4 {
t.Fatalf("unexpected amount of skylinks found, %v != 4, skylinks: %v", len(skylinks), skylinks)
}

// NOTE: it will have loaded the base32 encoded version Skylink and output
Expand Down Expand Up @@ -474,7 +485,7 @@ func testBuildAbuseReport(t *testing.T) {
// since we use the example email body we can rest assured it's correct
// since the unit tests cover that as well
pr := updated.ParseResult
if len(pr.Skylinks) != 4 {
if len(pr.Skylinks) != 6 {
t.Fatal("unexpected amount of skylinks", pr.Skylinks)
}
if len(pr.Tags) != 1 {
Expand Down

0 comments on commit b8f2c2e

Please sign in to comment.