From af674894f5129332459ef2c9715e9034fa67d83c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marius=20Andr=C3=A9=20Elsfjordstrand=20Beck?= Date: Wed, 15 May 2024 16:10:05 +0200 Subject: [PATCH] fix: parse keyvalue.CdxKey parts accurately This commit fixes parsing parts of the CdxKey in the keyvalue package. The problem where: - The Scheme method returned parts of the userinfo. - The presence of the colon character ':' in the userinfo part would lead to undefined behaviour. --- internal/keyvalue/db.go | 124 ++++++++++++++++++++++++----------- internal/keyvalue/db_test.go | 84 +++++++++++++++++++----- 2 files changed, 150 insertions(+), 58 deletions(-) diff --git a/internal/keyvalue/db.go b/internal/keyvalue/db.go index c8434db..d041198 100644 --- a/internal/keyvalue/db.go +++ b/internal/keyvalue/db.go @@ -75,32 +75,80 @@ func MarshalReport(report *schema.Report, prefix string) (key []byte, value []by } // CdxKey is a wrapper around the key used in the cdx index -// The key is a concatenation of the following parts: -// 1. domain and path -// 2. timestamp -// 3. scheme, userinfo and port -// 4. response type -// The parts are separated by a space character. +// The key consists of the following parts separated by a space character: +// 1. surt domain and path (/) +// 2. timestamp (14 digits) +// 3. port, scheme and userinfo (port:scheme@userinfo:) +// 4. response type (response) // // Example: // -// test,example,/path 20200101000000 :http: response +// test,example,/path 20200101000000 8080:http@user:password: response type CdxKey []byte -var spaceCharacter = []byte{32} -var colonCharacter = []byte{58} -var slashCharacter = []byte{47} +// byte constants used in the key +var ( + spaceCharacter = []byte{32} // ' ' + colonCharacter = []byte{58} // ':' + slashCharacter = []byte{47} // '/' + atCharacter = []byte{64} // '@' +) + +func (ck CdxKey) domainPath() []byte { + parts := bytes.Split(ck, spaceCharacter) + return parts[0] +} + +func (ck CdxKey) timestamp() []byte { + parts := bytes.Split(ck, spaceCharacter) + return parts[1] +} + +func (ck CdxKey) portSchemeUserInfo() []byte { + parts := bytes.Split(ck, spaceCharacter) + if len(parts) < 3 { + return nil + } + return parts[2] +} + +func (ck CdxKey) responseType() []byte { + parts := bytes.Split(ck, spaceCharacter) + if len(parts) < 4 { + return nil + } + return parts[3] +} + +func (ck CdxKey) port() []byte { + return bytes.Split(ck.portSchemeUserInfo(), colonCharacter)[0] +} + +func (ck CdxKey) schemeUserInfo() []byte { + portSchemeUserInfo := ck.portSchemeUserInfo() + if portSchemeUserInfo == nil { + return nil + } + + portAndSchemeAndUserInfo := bytes.SplitN(ck.portSchemeUserInfo(), colonCharacter, 2) + if len(portAndSchemeAndUserInfo) < 2 { + return nil + } + schemeUserInfo := portAndSchemeAndUserInfo[1] + return bytes.TrimRight(schemeUserInfo, ":") +} func (ck CdxKey) String() string { return string(ck) } -func (ck CdxKey) DomainAndPath() []byte { - return bytes.Split(ck, spaceCharacter)[0] +func (ck CdxKey) Domain() string { + b := ck.domainPath() + return string(bytes.Split(b, slashCharacter)[0]) } func (ck CdxKey) Path() string { - b := ck.DomainAndPath() + b := ck.domainPath() i := bytes.Index(b, slashCharacter) if i == -1 { return "" @@ -108,14 +156,8 @@ func (ck CdxKey) Path() string { return string(b[i:]) } -func (ck CdxKey) Domain() string { - b := ck.DomainAndPath() - return string(bytes.Split(b, slashCharacter)[0]) -} - func (ck CdxKey) Time() time.Time { - b := bytes.Split(ck, spaceCharacter)[1] - t, _ := timestamp.Parse(string(b)) + t, _ := timestamp.Parse(string(ck.timestamp())) return t } @@ -124,38 +166,40 @@ func (ck CdxKey) Unix() int64 { return ck.Time().Unix() } -func (ck CdxKey) PortSchemeUserInfo() string { - parts := bytes.Split(ck, spaceCharacter) - if len(parts) < 3 { - return "" - } - b := parts[2] - return string(b) +func (ck CdxKey) Port() string { + return string(ck.port()) } -func (ck CdxKey) Port() string { - parts := bytes.Split(ck, spaceCharacter) - if len(parts) < 3 { +func (ck CdxKey) PortSchemeUserInfo() string { + portSchemeUserInfo := ck.portSchemeUserInfo() + if portSchemeUserInfo == nil { return "" } - b := parts[2] - return string(bytes.Split(b, colonCharacter)[0]) + return string(portSchemeUserInfo) } func (ck CdxKey) Scheme() string { - parts := bytes.Split(ck, spaceCharacter) - if len(parts) < 3 { + schemeUserInfo := ck.schemeUserInfo() + if schemeUserInfo == nil { return "" } - b := parts[2] - return string(bytes.Split(b, colonCharacter)[1]) + scheme := bytes.Split(schemeUserInfo, atCharacter)[0] + return string(scheme) } func (ck CdxKey) UserInfo() string { - parts := bytes.Split(ck, spaceCharacter) - if len(parts) < 3 { + schemeUserInfo := ck.schemeUserInfo() + if schemeUserInfo == nil { return "" } - b := parts[2] - return string(bytes.Split(b, colonCharacter)[2]) + schemeAndUserInfo := bytes.Split(schemeUserInfo, atCharacter) + if len(schemeAndUserInfo) < 2 { + return "" + } + userInfo := schemeAndUserInfo[1] + return string(userInfo) } + +func (ck CdxKey) ResponseType() string { + return string(ck.responseType()) +} \ No newline at end of file diff --git a/internal/keyvalue/db_test.go b/internal/keyvalue/db_test.go index fde4be9..3ec931d 100644 --- a/internal/keyvalue/db_test.go +++ b/internal/keyvalue/db_test.go @@ -9,25 +9,64 @@ import ( // Test CdxKey.Domain func TestCdxKey(t *testing.T) { tests := []struct { - key CdxKey - domain string - path string - scheme string - ts string + key CdxKey + domain string + path string + ts string + port string + scheme string + userinfo string + recordType string }{ { - key: []byte("test,example,/path 20200101000000 :http: response"), - domain: "test,example,", - path: "/path", - scheme: "http", - ts: "20200101000000", + key: []byte("test,example,/path 20200101000000 :http: response"), + domain: "test,example,", + path: "/path", + port: "", + scheme: "http", + userinfo: "", + ts: "20200101000000", + recordType: "response", }, { - key: []byte("test,example,/ 20200101000000"), - domain: "test,example,", - path: "/", - scheme: "", - ts: "20200101000000", + key: []byte("test,example,/ 20200101000000"), + domain: "test,example,", + path: "/", + port: "", + scheme: "", + userinfo: "", + ts: "20200101000000", + recordType: "", + }, + { + key: []byte("test,example,/path 20200101000000 8080:http@gammalost: response"), + domain: "test,example,", + path: "/path", + port: "8080", + scheme: "http", + userinfo: "gammalost", + ts: "20200101000000", + recordType: "response", + }, + { + key: []byte("test,example,/path 20200101000000 8080:http@user:password: response"), + domain: "test,example,", + path: "/path", + port: "8080", + scheme: "http", + userinfo: "user:password", + ts: "20200101000000", + recordType: "response", + }, + { + key: []byte("test,example,/path 20200101000000 8080:http@user:pass:word: response"), + domain: "test,example,", + path: "/path", + port: "8080", + scheme: "http", + userinfo: "user:pass:word", + ts: "20200101000000", + recordType: "response", }, } @@ -39,13 +78,22 @@ func TestCdxKey(t *testing.T) { if string(test.key.Path()) != test.path { t.Errorf("Path() = %v, want %v", test.key.Path(), test.path) } - if string(test.key.Scheme()) != test.scheme { - t.Errorf("Scheme() = %v, want %v", test.key.Scheme(), test.scheme) - } ts, _ := timestamp.Parse(test.ts) if test.key.Time() != ts { t.Errorf("Timestamp() = %v, want %v", ts, test.ts) } + if string(test.key.Port()) != test.port { + t.Errorf("Port() = %v, want %v", test.key.Port(), test.port) + } + if string(test.key.Scheme()) != test.scheme { + t.Errorf("Scheme() = %v, want %v", test.key.Scheme(), test.scheme) + } + if string(test.key.UserInfo()) != test.userinfo { + t.Errorf("UserInfo() = %v, want %v", test.key.UserInfo(), test.userinfo) + } + if string(test.key.ResponseType()) != test.recordType { + t.Errorf("ResponseType() = %v, want %v", test.key.ResponseType(), test.recordType) + } }) } }