Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(PXP-6815): update MDS object after file upload. #29

Merged
merged 14 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 22 additions & 33 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"exclude": {
"files": null,
"files": "go.sum|^.secrets.baseline$",
"lines": null
},
"generated_at": "2020-07-20T21:00:34Z",
"generated_at": "2020-10-13T06:55:03Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -58,69 +58,58 @@
}
],
"results": {
"Gopkg.lock": [
"handlers/handler_test.go": [
{
"hashed_secret": "d2e2a9aee399f2db93bad1c76f133549b351b5b1",
"hashed_secret": "fea80b71da8a7d0532248ae5f0c19282e29f203c",
"is_secret": false,
"is_verified": false,
"line_number": 44,
"type": "Hex High Entropy String"
"line_number": 138,
"type": "Secret Keyword"
},
{
"hashed_secret": "8ece5ac7f119933e5e48b1b90cc9e73d10743f0c",
"hashed_secret": "1ec252abcbf0dca36f5083d209e3b4e065ad5778",
"is_secret": false,
"is_verified": false,
"line_number": 59,
"type": "Hex High Entropy String"
"line_number": 142,
"type": "Secret Keyword"
},
{
"hashed_secret": "bfbc53db495de2d802f59b91ca0b5e19f323a344",
"is_secret": false,
"is_verified": false,
"line_number": 148,
"type": "Secret Keyword"
}
],
"handlers/hash_utils_test.go": [
{
"hashed_secret": "b334975fba3eee5aab834f0d9cb5ffc1f47aeb56",
"is_secret": false,
"is_verified": false,
"line_number": 18,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "8c4ccd49bbb101ee2509994d4fa36985c3d5d730",
"is_secret": false,
"is_verified": false,
"line_number": 20,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "4fdd606fff86877b494664e737409eb7941a99f0",
"is_secret": false,
"is_verified": false,
"line_number": 21,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "f80c18650c6a1079704d378a25baeed1b5fe1f25",
"is_secret": false,
"is_verified": false,
"line_number": 22,
"type": "Hex High Entropy String"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/credentials/processcreds/provider.go": [
{
"hashed_secret": "84b5be4b49d590b4f58de237bd143a9ab493981e",
"is_verified": false,
"line_number": 128,
"type": "Secret Keyword"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/signer/v4/v4.go": [
{
"hashed_secret": "80b3e9a82b279b0a81e15465e9be19ea4c22db33",
"is_verified": false,
"line_number": 663,
"type": "Secret Keyword"
}
],
"vendor/github.com/jmespath/go-jmespath/astnodetype_string.go": [
{
"hashed_secret": "7d488916eb97aa479b5a45b3c7ddcc5a9cee4b3c",
"is_verified": false,
"line_number": 7,
"type": "Base64 High Entropy String"
}
]
},
"version": "0.13.1",
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: go

go:
- "1.10"
- "1.14"

# Restrict to cloning only 1 commit.
git:
Expand All @@ -10,4 +10,4 @@ git:
sudo: false

script:
- go test -v ./...
- go test -v ./...
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.13
require (
github.com/aws/aws-sdk-go v1.33.11
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/hashicorp/go-retryablehttp v0.6.7
github.com/jmespath/go-jmespath v0.3.0
github.com/magiconair/properties v1.8.1
)
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
github.com/aws/aws-sdk-go v1.33.11 h1:A7b3mNKbh/0zrhnNN/KxWD0YZJw2RImnjFXWOquYKB4=
github.com/aws/aws-sdk-go v1.33.11/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.6.7 h1:8/CAEZt/+F7kR7GevNHulKkUjLht3CPmn7egmhieNKo=
github.com/hashicorp/go-retryablehttp v0.6.7/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
Expand Down
143 changes: 75 additions & 68 deletions handlers/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,75 @@ package handlers

import (
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
)

// MaxRetries maximum number of retries
const MaxRetries = 5

type ConfigInfo struct {
Indexd IndexdInfo `indexd`
MetadataService MetadataServiceInfo `metadata_service`
}

type IndexdInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

func minOf(vars ...int64) int64 {
min := vars[0]
type MetadataServiceInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

// Read Indexd and Metadata Service config info from CONFIG_FILE into
// ConfigInfo struct. Panic if both Indexd config and Metadata Service
// configs can't be unmarshalled
func getConfigInfo() *ConfigInfo {
configInfo := new(ConfigInfo)
configBytes := []byte(os.Getenv("CONFIG_FILE"))

for _, i := range vars {
if min > i {
min = i
}
log.Printf("Attempting to unmarshal Indexd config from JSON in CONFIG_FILE env variable")
if err := json.Unmarshal(configBytes, &configInfo.Indexd); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
if configInfo.Indexd == (IndexdInfo{}) {
log.Panicf("Could not find Indexd config in JSON in CONFIG_FILE env variable. Both Indexd and Metadata Service configs are required")
}

return min
}
func getIndexServiceInfo() (*IndexdInfo, error) {
indexdInfo := new(IndexdInfo)
if err := json.Unmarshal([]byte(os.Getenv("CONFIG_FILE")), indexdInfo); err != nil {
return nil, errors.New("Enviroiment variable CONFIG_FILE is not set correctly")
log.Printf("Attempting to unmarshal Metadata Service config from JSON in CONFIG_FILE env variable")
if err := json.Unmarshal(configBytes, configInfo); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
return indexdInfo, nil
if configInfo.MetadataService == (MetadataServiceInfo{}) {
log.Panicf("Could not find Metadata Service config in JSON in CONFIG_FILE env variable. Both Indexd and Metadata Service configs are required")
}
log.Printf("Both Indexd and Metadata Service configs were unmarshalled")

return configInfo
}

// IndexS3Object indexes s3 object
// The fuction does several things. It first downloads the object from
// S3, computes size and hashes, and update indexd
// IndexS3Object indexes s3 object The fuction does several things. It
// downloads the object from S3, computes size and hashes, and updates Indexd
// and potentially Metadata Service
func IndexS3Object(s3objectURL string) {
configInfo := getConfigInfo()

s3objectURL, _ = url.QueryUnescape(s3objectURL)
u, err := url.Parse(s3objectURL)
if err != nil {
log.Panicf("Wrong url format %s\n", s3objectURL)
}
bucket, key := u.Host, u.Path
scheme, bucket, key := u.Scheme, u.Host, u.Path
bucketURL := fmt.Sprintf(`%s://%s`, scheme, bucket)

// key looks like one of these:
//
Expand All @@ -63,68 +85,53 @@ func IndexS3Object(s3objectURL string) {
} else {
uuid = strings.Join(split_key[:len(split_key)-1], "/")
}
filename := split_key[len(split_key)-1]
fileExtension := filepath.Ext(filename)
if len(fileExtension) > 0 {
fileExtension = fileExtension[1:]
}

log.Printf("Attempting to get rev for record %s in Indexd", uuid)
rev, err := GetIndexdRecordRev(uuid, configInfo.Indexd.URL)
mdsUploadedBody := fmt.Sprintf(`{"_bucket": "%s", "_filename": "%s", "_file_extension": "%s", "_upload_status": "uploaded"}`, bucketURL, filename, fileExtension)
if err != nil {
log.Panicf("Can not get record %s from Indexd. Error message %s", uuid, err)
} else if rev == "" {
log.Printf("Indexd record with guid %s already has size and hashes", uuid)
updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
return
}
log.Printf("Got rev %s from Indexd for record %s", rev, uuid)

updateMetadataObjectWrapper(uuid, configInfo, `{"_upload_status": "processing"}`)

var mdsErrorBody string = `{"_upload_status": "error"}`
client, err := CreateNewAwsClient()
if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Can not create AWS client. Detail %s\n\n", err)
}

log.Printf("Start to compute hashes for %s", key)
hashes, objectSize, err := CalculateBasicHashes(client, bucket, key)

if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Can not compute hashes for %s. Detail %s ", key, err)
}
log.Printf("Finish to compute hashes for %s", key)

indexdInfo, _ := getIndexServiceInfo()

var retries = 0
var rev = ""

for {
rev, err = GetIndexdRecordRev(uuid, indexdInfo.URL)
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if rev == "" {
log.Println("The file already has size and hashes")
return
} else {
break
}
if retries == MaxRetries {
log.Panicf("Can not get record %s from indexd. Error message %s", uuid, err)
}
}

body := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
indexdHashesBody := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
objectSize, s3objectURL, hashes.Md5, hashes.Sha1, hashes.Sha256, hashes.Sha512, hashes.Crc32c)

retries = 0
for {
resp, err := UpdateIndexdRecord(uuid, rev, indexdInfo, []byte(body))
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if resp.StatusCode != 200 {
log.Printf("StatusCode: %d. Retry: %d", resp.StatusCode, retries)
retries++
time.Sleep(5 * time.Second)
} else {
log.Printf("Finish updating the record %s. Response Status: %d. Body %s", uuid, resp.StatusCode, body)
break
}

if retries == MaxRetries {
if err == nil {
log.Panicf("Can not update %s with hash info. Body %s. Status code %d. Detail %s", uuid, body, resp.StatusCode, err)
} else {
log.Panicf("Can not update %s with hash info. Body %s. Detail %s", uuid, body, err)
}
break
}
log.Printf("Attempting to update Indexd record %s. Request Body: %s", uuid, indexdHashesBody)
resp, err := UpdateIndexdRecord(uuid, rev, &configInfo.Indexd, []byte(indexdHashesBody))
if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Could not update Indexd record %s. Error: %s", uuid, err)
} else if resp.StatusCode != http.StatusOK {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Could not update Indexd record %s. Response Status Code: %d", uuid, resp.StatusCode)
}
log.Printf("Updated Indexd record %s with hash info. Response Status Code: %d", uuid, resp.StatusCode)

updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
}
Loading