Skip to content

Commit

Permalink
Merge pull request #29 from uc-cdis/feat/update-mds-object-correspond…
Browse files Browse the repository at this point in the history
…ing-to-did

(PXP-6815): update MDS object after file upload.
  • Loading branch information
johnfrancismccann authored Oct 20, 2020
2 parents fc3e881 + 7c38369 commit e9ca31f
Show file tree
Hide file tree
Showing 9 changed files with 317 additions and 112 deletions.
55 changes: 22 additions & 33 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"exclude": {
"files": null,
"files": "go.sum|^.secrets.baseline$",
"lines": null
},
"generated_at": "2020-07-20T21:00:34Z",
"generated_at": "2020-10-13T06:55:03Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -58,69 +58,58 @@
}
],
"results": {
"Gopkg.lock": [
"handlers/handler_test.go": [
{
"hashed_secret": "d2e2a9aee399f2db93bad1c76f133549b351b5b1",
"hashed_secret": "fea80b71da8a7d0532248ae5f0c19282e29f203c",
"is_secret": false,
"is_verified": false,
"line_number": 44,
"type": "Hex High Entropy String"
"line_number": 138,
"type": "Secret Keyword"
},
{
"hashed_secret": "8ece5ac7f119933e5e48b1b90cc9e73d10743f0c",
"hashed_secret": "1ec252abcbf0dca36f5083d209e3b4e065ad5778",
"is_secret": false,
"is_verified": false,
"line_number": 59,
"type": "Hex High Entropy String"
"line_number": 142,
"type": "Secret Keyword"
},
{
"hashed_secret": "bfbc53db495de2d802f59b91ca0b5e19f323a344",
"is_secret": false,
"is_verified": false,
"line_number": 148,
"type": "Secret Keyword"
}
],
"handlers/hash_utils_test.go": [
{
"hashed_secret": "b334975fba3eee5aab834f0d9cb5ffc1f47aeb56",
"is_secret": false,
"is_verified": false,
"line_number": 18,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "8c4ccd49bbb101ee2509994d4fa36985c3d5d730",
"is_secret": false,
"is_verified": false,
"line_number": 20,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "4fdd606fff86877b494664e737409eb7941a99f0",
"is_secret": false,
"is_verified": false,
"line_number": 21,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "f80c18650c6a1079704d378a25baeed1b5fe1f25",
"is_secret": false,
"is_verified": false,
"line_number": 22,
"type": "Hex High Entropy String"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/credentials/processcreds/provider.go": [
{
"hashed_secret": "84b5be4b49d590b4f58de237bd143a9ab493981e",
"is_verified": false,
"line_number": 128,
"type": "Secret Keyword"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/signer/v4/v4.go": [
{
"hashed_secret": "80b3e9a82b279b0a81e15465e9be19ea4c22db33",
"is_verified": false,
"line_number": 663,
"type": "Secret Keyword"
}
],
"vendor/github.com/jmespath/go-jmespath/astnodetype_string.go": [
{
"hashed_secret": "7d488916eb97aa479b5a45b3c7ddcc5a9cee4b3c",
"is_verified": false,
"line_number": 7,
"type": "Base64 High Entropy String"
}
]
},
"version": "0.13.1",
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: go

go:
- "1.10"
- "1.14"

# Restrict to cloning only 1 commit.
git:
Expand All @@ -10,4 +10,4 @@ git:
sudo: false

script:
- go test -v ./...
- go test -v ./...
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.13
require (
github.com/aws/aws-sdk-go v1.33.11
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/hashicorp/go-retryablehttp v0.6.7
github.com/jmespath/go-jmespath v0.3.0
github.com/magiconair/properties v1.8.1
)
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
github.com/aws/aws-sdk-go v1.33.11 h1:A7b3mNKbh/0zrhnNN/KxWD0YZJw2RImnjFXWOquYKB4=
github.com/aws/aws-sdk-go v1.33.11/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.6.7 h1:8/CAEZt/+F7kR7GevNHulKkUjLht3CPmn7egmhieNKo=
github.com/hashicorp/go-retryablehttp v0.6.7/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
Expand Down
143 changes: 75 additions & 68 deletions handlers/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,75 @@ package handlers

import (
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
)

// MaxRetries maximum number of retries
const MaxRetries = 5

type ConfigInfo struct {
Indexd IndexdInfo `indexd`
MetadataService MetadataServiceInfo `metadata_service`
}

type IndexdInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

func minOf(vars ...int64) int64 {
min := vars[0]
type MetadataServiceInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

// Read Indexd and Metadata Service config info from CONFIG_FILE into
// ConfigInfo struct. Panic if both Indexd config and Metadata Service
// configs can't be unmarshalled
func getConfigInfo() *ConfigInfo {
configInfo := new(ConfigInfo)
configBytes := []byte(os.Getenv("CONFIG_FILE"))

for _, i := range vars {
if min > i {
min = i
}
log.Printf("Attempting to unmarshal Indexd config from JSON in CONFIG_FILE env variable")
if err := json.Unmarshal(configBytes, &configInfo.Indexd); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
if configInfo.Indexd == (IndexdInfo{}) {
log.Panicf("Could not find Indexd config in JSON in CONFIG_FILE env variable. Both Indexd and Metadata Service configs are required")
}

return min
}
func getIndexServiceInfo() (*IndexdInfo, error) {
indexdInfo := new(IndexdInfo)
if err := json.Unmarshal([]byte(os.Getenv("CONFIG_FILE")), indexdInfo); err != nil {
return nil, errors.New("Enviroiment variable CONFIG_FILE is not set correctly")
log.Printf("Attempting to unmarshal Metadata Service config from JSON in CONFIG_FILE env variable")
if err := json.Unmarshal(configBytes, configInfo); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
return indexdInfo, nil
if configInfo.MetadataService == (MetadataServiceInfo{}) {
log.Panicf("Could not find Metadata Service config in JSON in CONFIG_FILE env variable. Both Indexd and Metadata Service configs are required")
}
log.Printf("Both Indexd and Metadata Service configs were unmarshalled")

return configInfo
}

// IndexS3Object indexes s3 object
// The fuction does several things. It first downloads the object from
// S3, computes size and hashes, and update indexd
// IndexS3Object indexes s3 object The fuction does several things. It
// downloads the object from S3, computes size and hashes, and updates Indexd
// and potentially Metadata Service
func IndexS3Object(s3objectURL string) {
configInfo := getConfigInfo()

s3objectURL, _ = url.QueryUnescape(s3objectURL)
u, err := url.Parse(s3objectURL)
if err != nil {
log.Panicf("Wrong url format %s\n", s3objectURL)
}
bucket, key := u.Host, u.Path
scheme, bucket, key := u.Scheme, u.Host, u.Path
bucketURL := fmt.Sprintf(`%s://%s`, scheme, bucket)

// key looks like one of these:
//
Expand All @@ -63,68 +85,53 @@ func IndexS3Object(s3objectURL string) {
} else {
uuid = strings.Join(split_key[:len(split_key)-1], "/")
}
filename := split_key[len(split_key)-1]
fileExtension := filepath.Ext(filename)
if len(fileExtension) > 0 {
fileExtension = fileExtension[1:]
}

log.Printf("Attempting to get rev for record %s in Indexd", uuid)
rev, err := GetIndexdRecordRev(uuid, configInfo.Indexd.URL)
mdsUploadedBody := fmt.Sprintf(`{"_bucket": "%s", "_filename": "%s", "_file_extension": "%s", "_upload_status": "uploaded"}`, bucketURL, filename, fileExtension)
if err != nil {
log.Panicf("Can not get record %s from Indexd. Error message %s", uuid, err)
} else if rev == "" {
log.Printf("Indexd record with guid %s already has size and hashes", uuid)
updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
return
}
log.Printf("Got rev %s from Indexd for record %s", rev, uuid)

updateMetadataObjectWrapper(uuid, configInfo, `{"_upload_status": "processing"}`)

var mdsErrorBody string = `{"_upload_status": "error"}`
client, err := CreateNewAwsClient()
if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Can not create AWS client. Detail %s\n\n", err)
}

log.Printf("Start to compute hashes for %s", key)
hashes, objectSize, err := CalculateBasicHashes(client, bucket, key)

if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Can not compute hashes for %s. Detail %s ", key, err)
}
log.Printf("Finish to compute hashes for %s", key)

indexdInfo, _ := getIndexServiceInfo()

var retries = 0
var rev = ""

for {
rev, err = GetIndexdRecordRev(uuid, indexdInfo.URL)
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if rev == "" {
log.Println("The file already has size and hashes")
return
} else {
break
}
if retries == MaxRetries {
log.Panicf("Can not get record %s from indexd. Error message %s", uuid, err)
}
}

body := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
indexdHashesBody := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
objectSize, s3objectURL, hashes.Md5, hashes.Sha1, hashes.Sha256, hashes.Sha512, hashes.Crc32c)

retries = 0
for {
resp, err := UpdateIndexdRecord(uuid, rev, indexdInfo, []byte(body))
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if resp.StatusCode != 200 {
log.Printf("StatusCode: %d. Retry: %d", resp.StatusCode, retries)
retries++
time.Sleep(5 * time.Second)
} else {
log.Printf("Finish updating the record %s. Response Status: %d. Body %s", uuid, resp.StatusCode, body)
break
}

if retries == MaxRetries {
if err == nil {
log.Panicf("Can not update %s with hash info. Body %s. Status code %d. Detail %s", uuid, body, resp.StatusCode, err)
} else {
log.Panicf("Can not update %s with hash info. Body %s. Detail %s", uuid, body, err)
}
break
}
log.Printf("Attempting to update Indexd record %s. Request Body: %s", uuid, indexdHashesBody)
resp, err := UpdateIndexdRecord(uuid, rev, &configInfo.Indexd, []byte(indexdHashesBody))
if err != nil {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Could not update Indexd record %s. Error: %s", uuid, err)
} else if resp.StatusCode != http.StatusOK {
updateMetadataObjectWrapper(uuid, configInfo, mdsErrorBody)
log.Panicf("Could not update Indexd record %s. Response Status Code: %d", uuid, resp.StatusCode)
}
log.Printf("Updated Indexd record %s with hash info. Response Status Code: %d", uuid, resp.StatusCode)

updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
}
Loading

0 comments on commit e9ca31f

Please sign in to comment.