Skip to content

Commit

Permalink
Add Full Object Checksums and CRC64-NVME
Browse files Browse the repository at this point in the history
Backport of AIStor PR 247.

Add support for full object checksums as described here:

https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

New checksum types are fully supported. Mint tests from minio/minio-go#2026 are now passing.

Includes fixes from minio#20743 for mint tests.

Add using checksums as validation for object content. Fixes minio#20845 minio#20849

Fixes checksum replication (downstream PR 250)
  • Loading branch information
klauspost committed Jan 20, 2025
1 parent 779ec8f commit 176ac02
Show file tree
Hide file tree
Showing 16 changed files with 376 additions and 168 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/run-mint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ export ACCESS_KEY="$2"
export SECRET_KEY="$3"
export JOB_NAME="$4"
export MINT_MODE="full"
export MINT_NO_FULL_OBJECT="true"

docker system prune -f || true
docker volume prune -f || true
Expand Down Expand Up @@ -39,7 +38,6 @@ docker run --rm --net=mint_default \
-e ACCESS_KEY="${ACCESS_KEY}" \
-e SECRET_KEY="${SECRET_KEY}" \
-e ENABLE_HTTPS=0 \
-e MINT_NO_FULL_OBJECT="${MINT_NO_FULL_OBJECT}" \
-e MINT_MODE="${MINT_MODE}" \
docker.io/minio/mint:edge

Expand Down
2 changes: 1 addition & 1 deletion cmd/api-errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ var errorCodes = errorCodeMap{
},
ErrMissingContentMD5: {
Code: "MissingContentMD5",
Description: "Missing required header for this request: Content-Md5.",
Description: "Missing or invalid required header for this request: Content-Md5 or Amz-Content-Checksum",
HTTPStatusCode: http.StatusBadRequest,
},
ErrMissingSecurityHeader: {
Expand Down
33 changes: 20 additions & 13 deletions cmd/api-response.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,11 @@ type Part struct {
Size int64

// Checksum values
ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"`
ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"`
ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"`
ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"`
ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"`
ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"`
ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"`
ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"`
ChecksumCRC64NVME string `xml:",omitempty"`
}

// ListPartsResponse - format for list parts response.
Expand All @@ -192,6 +193,8 @@ type ListPartsResponse struct {
IsTruncated bool

ChecksumAlgorithm string
ChecksumType string

// List of parts.
Parts []Part `xml:"Part"`
}
Expand Down Expand Up @@ -413,10 +416,11 @@ type CompleteMultipartUploadResponse struct {
Key string
ETag string

ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"`
ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"`
ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"`
ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"`
ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"`
ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"`
ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"`
ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"`
ChecksumCRC64NVME string `xml:",omitempty"`
}

// DeleteError structure.
Expand Down Expand Up @@ -793,11 +797,12 @@ func generateCompleteMultipartUploadResponse(bucket, key, location string, oi Ob
Bucket: bucket,
Key: key,
// AWS S3 quotes the ETag in XML, make sure we are compatible here.
ETag: "\"" + oi.ETag + "\"",
ChecksumSHA1: cs[hash.ChecksumSHA1.String()],
ChecksumSHA256: cs[hash.ChecksumSHA256.String()],
ChecksumCRC32: cs[hash.ChecksumCRC32.String()],
ChecksumCRC32C: cs[hash.ChecksumCRC32C.String()],
ETag: "\"" + oi.ETag + "\"",
ChecksumSHA1: cs[hash.ChecksumSHA1.String()],
ChecksumSHA256: cs[hash.ChecksumSHA256.String()],
ChecksumCRC32: cs[hash.ChecksumCRC32.String()],
ChecksumCRC32C: cs[hash.ChecksumCRC32C.String()],
ChecksumCRC64NVME: cs[hash.ChecksumCRC64NVME.String()],
}
return c
}
Expand Down Expand Up @@ -825,6 +830,7 @@ func generateListPartsResponse(partsInfo ListPartsInfo, encodingType string) Lis
listPartsResponse.IsTruncated = partsInfo.IsTruncated
listPartsResponse.NextPartNumberMarker = partsInfo.NextPartNumberMarker
listPartsResponse.ChecksumAlgorithm = partsInfo.ChecksumAlgorithm
listPartsResponse.ChecksumType = partsInfo.ChecksumType

listPartsResponse.Parts = make([]Part, len(partsInfo.Parts))
for index, part := range partsInfo.Parts {
Expand All @@ -837,6 +843,7 @@ func generateListPartsResponse(partsInfo ListPartsInfo, encodingType string) Lis
newPart.ChecksumCRC32C = part.ChecksumCRC32C
newPart.ChecksumSHA1 = part.ChecksumSHA1
newPart.ChecksumSHA256 = part.ChecksumSHA256
newPart.ChecksumCRC64NVME = part.ChecksumCRC64NVME
listPartsResponse.Parts[index] = newPart
}
return listPartsResponse
Expand Down
2 changes: 1 addition & 1 deletion cmd/bucket-handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ func (api objectAPIHandlers) DeleteMultipleObjectsHandler(w http.ResponseWriter,

// Content-Md5 is required should be set
// http://docs.aws.amazon.com/AmazonS3/latest/API/multiobjectdeleteapi.html
if _, ok := r.Header[xhttp.ContentMD5]; !ok {
if !validateLengthAndChecksum(r) {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrMissingContentMD5), r.URL)
return
}
Expand Down
5 changes: 2 additions & 3 deletions cmd/bucket-lifecycle-handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package cmd

import (
"encoding/xml"
"io"
"net/http"
"strconv"
"time"
Expand Down Expand Up @@ -53,7 +52,7 @@ func (api objectAPIHandlers) PutBucketLifecycleHandler(w http.ResponseWriter, r
bucket := vars["bucket"]

// PutBucketLifecycle always needs a Content-Md5
if _, ok := r.Header[xhttp.ContentMD5]; !ok {
if !validateLengthAndChecksum(r) {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrMissingContentMD5), r.URL)
return
}
Expand All @@ -70,7 +69,7 @@ func (api objectAPIHandlers) PutBucketLifecycleHandler(w http.ResponseWriter, r
return
}

bucketLifecycle, err := lifecycle.ParseLifecycleConfigWithID(io.LimitReader(r.Body, r.ContentLength))
bucketLifecycle, err := lifecycle.ParseLifecycleConfigWithID(r.Body)
if err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
Expand Down
52 changes: 40 additions & 12 deletions cmd/bucket-replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -794,18 +794,23 @@ func putReplicationOpts(ctx context.Context, sc string, objInfo ObjectInfo, part
meta[k] = v
}
}

if len(objInfo.Checksum) > 0 {
// Add encrypted CRC to metadata for SSE-C objects.
if isSSEC {
meta[ReplicationSsecChecksumHeader] = base64.StdEncoding.EncodeToString(objInfo.Checksum)
} else {
for _, pi := range objInfo.Parts {
if pi.Number == partNum {
for k, v := range pi.Checksums {
meta[k] = v
if objInfo.isMultipart() && partNum > 0 {
for _, pi := range objInfo.Parts {
if pi.Number == partNum {
for k, v := range pi.Checksums { // for PutObjectPart
meta[k] = v
}
}
}
} else {
for k, v := range getCRCMeta(objInfo, 0, nil) { // for PutObject/NewMultipartUpload
meta[k] = v
}
}
}
}
Expand Down Expand Up @@ -1666,7 +1671,7 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob
cHeader := http.Header{}
cHeader.Add(xhttp.MinIOSourceReplicationRequest, "true")
if !isSSEC {
for k, v := range partInfo.Checksums {
for k, v := range getCRCMeta(objInfo, partInfo.Number, nil) {
cHeader.Add(k, v)
}
}
Expand All @@ -1690,12 +1695,13 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob
return fmt.Errorf("ssec(%t): Part size mismatch: got %d, want %d", isSSEC, pInfo.Size, size)
}
uploadedParts = append(uploadedParts, minio.CompletePart{
PartNumber: pInfo.PartNumber,
ETag: pInfo.ETag,
ChecksumCRC32: pInfo.ChecksumCRC32,
ChecksumCRC32C: pInfo.ChecksumCRC32C,
ChecksumSHA1: pInfo.ChecksumSHA1,
ChecksumSHA256: pInfo.ChecksumSHA256,
PartNumber: pInfo.PartNumber,
ETag: pInfo.ETag,
ChecksumCRC32: pInfo.ChecksumCRC32,
ChecksumCRC32C: pInfo.ChecksumCRC32C,
ChecksumSHA1: pInfo.ChecksumSHA1,
ChecksumSHA256: pInfo.ChecksumSHA256,
ChecksumCRC64NVME: pInfo.ChecksumCRC64NVME,
})
}
userMeta := map[string]string{
Expand All @@ -1708,6 +1714,12 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob
// really big value but its okay on heavily loaded systems. This is just tail end timeout.
cctx, ccancel := context.WithTimeout(ctx, 10*time.Minute)
defer ccancel()

if len(objInfo.Checksum) > 0 {
for k, v := range getCRCMeta(objInfo, 0, nil) {
userMeta[k] = v
}
}
_, err = c.CompleteMultipartUpload(cctx, bucket, object, uploadID, uploadedParts, minio.PutObjectOptions{
UserMetadata: userMeta,
Internal: minio.AdvancedPutOptions{
Expand Down Expand Up @@ -3753,3 +3765,19 @@ type validateReplicationDestinationOptions struct {

checkReadyErr sync.Map
}

func getCRCMeta(oi ObjectInfo, partNum int, h http.Header) map[string]string {
meta := make(map[string]string)
cs := oi.decryptChecksums(partNum, h)
for k, v := range cs {
cksum := hash.NewChecksumString(k, v)
if cksum == nil {
continue
}
if cksum.Valid() {
meta[cksum.Type.Key()] = v
}
meta[xhttp.AmzChecksumType] = cksum.Type.ObjType()
}
return meta
}
Loading

0 comments on commit 176ac02

Please sign in to comment.