Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(PXP-6815): update MDS object after file upload. #29

Merged
merged 14 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 22 additions & 33 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"exclude": {
"files": null,
"files": "go.sum|^.secrets.baseline$",
"lines": null
},
"generated_at": "2020-07-20T21:00:34Z",
"generated_at": "2020-10-02T21:40:51Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -58,69 +58,58 @@
}
],
"results": {
"Gopkg.lock": [
"handlers/handler_test.go": [
{
"hashed_secret": "d2e2a9aee399f2db93bad1c76f133549b351b5b1",
"hashed_secret": "fea80b71da8a7d0532248ae5f0c19282e29f203c",
"is_secret": false,
"is_verified": false,
"line_number": 44,
"type": "Hex High Entropy String"
"line_number": 143,
"type": "Secret Keyword"
},
{
"hashed_secret": "8ece5ac7f119933e5e48b1b90cc9e73d10743f0c",
"hashed_secret": "1ec252abcbf0dca36f5083d209e3b4e065ad5778",
"is_secret": false,
"is_verified": false,
"line_number": 59,
"type": "Hex High Entropy String"
"line_number": 148,
"type": "Secret Keyword"
},
{
"hashed_secret": "bfbc53db495de2d802f59b91ca0b5e19f323a344",
"is_secret": false,
"is_verified": false,
"line_number": 154,
"type": "Secret Keyword"
}
],
"handlers/hash_utils_test.go": [
{
"hashed_secret": "b334975fba3eee5aab834f0d9cb5ffc1f47aeb56",
"is_secret": false,
"is_verified": false,
"line_number": 18,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "8c4ccd49bbb101ee2509994d4fa36985c3d5d730",
"is_secret": false,
"is_verified": false,
"line_number": 20,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "4fdd606fff86877b494664e737409eb7941a99f0",
"is_secret": false,
"is_verified": false,
"line_number": 21,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "f80c18650c6a1079704d378a25baeed1b5fe1f25",
"is_secret": false,
"is_verified": false,
"line_number": 22,
"type": "Hex High Entropy String"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/credentials/processcreds/provider.go": [
{
"hashed_secret": "84b5be4b49d590b4f58de237bd143a9ab493981e",
"is_verified": false,
"line_number": 128,
"type": "Secret Keyword"
}
],
"vendor/github.com/aws/aws-sdk-go/aws/signer/v4/v4.go": [
{
"hashed_secret": "80b3e9a82b279b0a81e15465e9be19ea4c22db33",
"is_verified": false,
"line_number": 663,
"type": "Secret Keyword"
}
],
"vendor/github.com/jmespath/go-jmespath/astnodetype_string.go": [
{
"hashed_secret": "7d488916eb97aa479b5a45b3c7ddcc5a9cee4b3c",
"is_verified": false,
"line_number": 7,
"type": "Base64 High Entropy String"
}
]
},
"version": "0.13.1",
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: go

go:
- "1.10"
- "1.14"

# Restrict to cloning only 1 commit.
git:
Expand All @@ -10,4 +10,4 @@ git:
sudo: false

script:
- go test -v ./...
- go test -v ./...
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.13
require (
github.com/aws/aws-sdk-go v1.33.11
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/hashicorp/go-retryablehttp v0.6.7
github.com/jmespath/go-jmespath v0.3.0
github.com/magiconair/properties v1.8.1
)
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
github.com/aws/aws-sdk-go v1.33.11 h1:A7b3mNKbh/0zrhnNN/KxWD0YZJw2RImnjFXWOquYKB4=
github.com/aws/aws-sdk-go v1.33.11/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.6.7 h1:8/CAEZt/+F7kR7GevNHulKkUjLht3CPmn7egmhieNKo=
github.com/hashicorp/go-retryablehttp v0.6.7/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
Expand Down
130 changes: 64 additions & 66 deletions handlers/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,68 @@ package handlers

import (
"encoding/json"
"errors"
"fmt"
"log"
"net/http"
"net/url"
"os"
"strings"
"time"
)

// MaxRetries maximum number of retries
const MaxRetries = 5

type ConfigInfo struct {
Indexd IndexdInfo `indexd`
MetadataService MetadataServiceInfo `metadata_service`
}

type IndexdInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

func minOf(vars ...int64) int64 {
min := vars[0]
type MetadataServiceInfo struct {
URL string `url`
Username string `username`
Password string `password`
}

for _, i := range vars {
if min > i {
min = i
// Read Indexd and Metadata Service config info from CONFIG_FILE into
// ConfigInfo struct. Panic if Indexd creds could not be found
func getConfigInfo() *ConfigInfo {
configInfo := new(ConfigInfo)
configBytes := []byte(os.Getenv("CONFIG_FILE"))
log.Printf("Attempting to unmarshal both Indexd and Metadata Service configs from JSON in CONFIG_FILE env variable")
if err := json.Unmarshal(configBytes, configInfo); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
if configInfo.Indexd == (IndexdInfo{}) {
log.Printf("Could not find required Indexd config when unmarshalling both Indexd and Metadata Service configs. Trying again to only unmarshal Indexd config")
if err := json.Unmarshal(configBytes, &configInfo.Indexd); err != nil {
log.Panicf("Could not unmarshal JSON in CONFIG_FILE env variable: %s", err)
}
if configInfo.Indexd == (IndexdInfo{}) {
log.Panicf("Could not find required Indexd config in JSON in CONFIG_FILE env variable")
}
}

return min
}
func getIndexServiceInfo() (*IndexdInfo, error) {
indexdInfo := new(IndexdInfo)
if err := json.Unmarshal([]byte(os.Getenv("CONFIG_FILE")), indexdInfo); err != nil {
return nil, errors.New("Enviroiment variable CONFIG_FILE is not set correctly")
log.Printf("Indexd config was unmarshalled")
if configInfo.MetadataService != (MetadataServiceInfo{}) {
log.Printf("Metadata Service config was unmarshalled")
} else {
log.Printf("Metadata Service config was not unmarshalled")
}
return indexdInfo, nil

return configInfo
}

// IndexS3Object indexes s3 object
// The fuction does several things. It first downloads the object from
// S3, computes size and hashes, and update indexd
// IndexS3Object indexes s3 object The fuction does several things. It
// downloads the object from S3, computes size and hashes, and updates Indexd
// and potentially Metadata Service
func IndexS3Object(s3objectURL string) {
configInfo := getConfigInfo()

s3objectURL, _ = url.QueryUnescape(s3objectURL)
u, err := url.Parse(s3objectURL)
if err != nil {
Expand All @@ -63,6 +84,21 @@ func IndexS3Object(s3objectURL string) {
} else {
uuid = strings.Join(split_key[:len(split_key)-1], "/")
}
filename := split_key[len(split_key)-1]

log.Printf("Attempting to get rev for record %s in Indexd", uuid)
rev, err := GetIndexdRecordRev(uuid, configInfo.Indexd.URL)
mdsUploadedBody := fmt.Sprintf(`{"_upload_status": "uploaded", "_filename": "%s"}`, filename)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the ticket and design doc mention _file_type but not _filename

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I talked with @Avantol13 about this. We thought _filename could be preferable. I just updated the design doc to reflect this.

if err != nil {
log.Panicf("Can not get record %s from Indexd. Error message %s", uuid, err)
} else if rev == "" {
log.Printf("Indexd record with guid %s already has size and hashes", uuid)
updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
return
}
log.Printf("Got rev %s from Indexd for record %s", rev, uuid)

updateMetadataObjectWrapper(uuid, configInfo, `{"_upload_status": "indexs3client job calculating hashes and size"}`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all the others statuses in the design doc are a single word (or underscore-separated words)

you might want to check with @Avantol13 if this is an acceptable status. if not, maybe uploaded is fine, or maybe something like processing if you do want to differentiate it from uploaded?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah just one thing to keep in mind is what _upload_status should be in the case that this job fails to download and calculate hashes for the file object (e.g. maybe the file is too big and can't be downloaded)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that's not detailed in the design doc, i'll let you and Alex make that call


client, err := CreateNewAwsClient()
if err != nil {
Expand All @@ -77,54 +113,16 @@ func IndexS3Object(s3objectURL string) {
}
log.Printf("Finish to compute hashes for %s", key)

indexdInfo, _ := getIndexServiceInfo()

var retries = 0
var rev = ""

for {
rev, err = GetIndexdRecordRev(uuid, indexdInfo.URL)
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if rev == "" {
log.Println("The file already has size and hashes")
return
} else {
break
}
if retries == MaxRetries {
log.Panicf("Can not get record %s from indexd. Error message %s", uuid, err)
}
}

body := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
indexdHashesBody := fmt.Sprintf(`{"size": %d, "urls": ["%s"], "hashes": {"md5": "%s", "sha1":"%s", "sha256": "%s", "sha512": "%s", "crc": "%s"}}`,
objectSize, s3objectURL, hashes.Md5, hashes.Sha1, hashes.Sha256, hashes.Sha512, hashes.Crc32c)

retries = 0
for {
resp, err := UpdateIndexdRecord(uuid, rev, indexdInfo, []byte(body))
if err != nil {
retries++
log.Printf("Error: %s. Retry: %d", err, retries)
time.Sleep(5 * time.Second)
} else if resp.StatusCode != 200 {
log.Printf("StatusCode: %d. Retry: %d", resp.StatusCode, retries)
retries++
time.Sleep(5 * time.Second)
} else {
log.Printf("Finish updating the record %s. Response Status: %d. Body %s", uuid, resp.StatusCode, body)
break
}

if retries == MaxRetries {
if err == nil {
log.Panicf("Can not update %s with hash info. Body %s. Status code %d. Detail %s", uuid, body, resp.StatusCode, err)
} else {
log.Panicf("Can not update %s with hash info. Body %s. Detail %s", uuid, body, err)
}
break
}
log.Printf("Attempting to update Indexd record %s. Request Body: %s", uuid, indexdHashesBody)
resp, err := UpdateIndexdRecord(uuid, rev, &configInfo.Indexd, []byte(indexdHashesBody))
if err != nil {
log.Panicf("Could not update Indexd record %s. Error: %s", uuid, err)
} else if resp.StatusCode != http.StatusOK {
log.Panicf("Could not update Indexd record %s. Response Status Code: %d", uuid, resp.StatusCode)
}
log.Printf("Updated Indexd record %s with hash info. Response Status Code: %d", uuid, resp.StatusCode)

updateMetadataObjectWrapper(uuid, configInfo, mdsUploadedBody)
}
Loading