Skip to content

Commit

Permalink
Ignore records with IDENTITY CID in IndexSorted
Browse files Browse the repository at this point in the history
Skip `IDENTITY` CIDs in `IndexSorted`, as required by CARv2
specification.

Re-generate `sample-wrapped-v2.car` testdata, since its previous index
contained CIDs with `IDENTITY` multihash.

Gracefully handle CIDs with `IDENTITY` multihash code in the
`blockstore` API. Since `Get`, `GetSize` and `Has` interfaces rely on
the index, decode given keys directly to satisfy calls for such CIDs.

See:
- https://ipld.io/specs/transport/car/carv2/#index-format

Fixes #215
  • Loading branch information
masih committed Sep 8, 2021
1 parent 2bff9fa commit acd3ead
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 12 deletions.
12 changes: 11 additions & 1 deletion v2/blockstore/doc.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// package blockstore implements the IPFS blockstore interface backed by a CAR file.
// Package blockstore implements the IPFS blockstore interface backed by a CAR file.
// This package provides two flavours of blockstore: ReadOnly and ReadWrite.
//
// The ReadOnly blockstore provides a read-only random access from a given data payload either in
Expand All @@ -15,4 +15,14 @@
// instantiated from the same file path using OpenReadOnly.
// A user may resume reading/writing from files produced by an instance of ReadWrite blockstore. The
// resumption is attempted automatically, if the path passed to OpenReadWrite exists.
//
// Note that the blockstore implementations in this package behave similarly to IPFS IdStore wrapper
// when given CIDs with multihash.IDENTITY code.
// More specifically, for CIDs with multhash.IDENTITY code:
// * blockstore.Has will always return true.
// * blockstore.Get will always succeed, returning the multihash digest of the given CID.
// * blockstore.GetSize will always succeed, returning the multihash digest length of the given CID.
// * blockstore.Put and blockstore.PutMany will always succeed without performing any operation.
//
// See: https://pkg.go.dev/github.com/ipfs/go-ipfs-blockstore#NewIdStore
package blockstore
40 changes: 38 additions & 2 deletions v2/blockstore/readonly.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ import (

"golang.org/x/exp/mmap"

"github.com/multiformats/go-varint"

blocks "github.com/ipfs/go-block-format"
"github.com/ipfs/go-cid"
blockstore "github.com/ipfs/go-ipfs-blockstore"
Expand All @@ -20,6 +18,8 @@ import (
"github.com/ipld/go-car/v2/internal/carv1"
"github.com/ipld/go-car/v2/internal/carv1/util"
internalio "github.com/ipld/go-car/v2/internal/io"
"github.com/multiformats/go-multihash"
"github.com/multiformats/go-varint"
)

var _ blockstore.Blockstore = (*ReadOnly)(nil)
Expand Down Expand Up @@ -187,7 +187,16 @@ func (b *ReadOnly) DeleteBlock(_ cid.Cid) error {
}

// Has indicates if the store contains a block that corresponds to the given key.
// This function always returns true for any given key with multihash.IDENTITY code.
func (b *ReadOnly) Has(key cid.Cid) (bool, error) {
// Check if the given CID has multihash.IDENTITY code
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if _, ok, err := isIdentity(key); err != nil {
return false, err
} else if ok {
return true, nil
}

b.mu.RLock()
defer b.mu.RUnlock()

Expand Down Expand Up @@ -227,7 +236,16 @@ func (b *ReadOnly) Has(key cid.Cid) (bool, error) {
}

// Get gets a block corresponding to the given key.
// This API will always return true if the given key has multihash.IDENTITY code.
func (b *ReadOnly) Get(key cid.Cid) (blocks.Block, error) {
// Check if the given CID has multihash.IDENTITY code
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if digest, ok, err := isIdentity(key); err != nil {
return nil, err
} else if ok {
return blocks.NewBlockWithCid(digest, key)
}

b.mu.RLock()
defer b.mu.RUnlock()

Expand Down Expand Up @@ -272,6 +290,14 @@ func (b *ReadOnly) Get(key cid.Cid) (blocks.Block, error) {

// GetSize gets the size of an item corresponding to the given key.
func (b *ReadOnly) GetSize(key cid.Cid) (int, error) {
// Check if the given CID has multihash.IDENTITY code
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if digest, ok, err := isIdentity(key); err != nil {
return 0, err
} else if ok {
return len(digest), nil
}

b.mu.RLock()
defer b.mu.RUnlock()

Expand Down Expand Up @@ -320,6 +346,16 @@ func (b *ReadOnly) GetSize(key cid.Cid) (int, error) {
return fnSize, nil
}

func isIdentity(key cid.Cid) (digest []byte, ok bool, err error) {
dmh, err := multihash.Decode(key.Hash())
if err != nil {
return nil, false, err
}
ok = dmh.Code == multihash.IDENTITY
digest = dmh.Digest
return digest, ok, nil
}

// Put is not supported and always returns an error.
func (b *ReadOnly) Put(blocks.Block) error {
return errReadOnly
Expand Down
7 changes: 7 additions & 0 deletions v2/blockstore/readwrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,13 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error {
for _, bl := range blks {
c := bl.Cid()

// Check for IDENTITY CID. If IDENTITY, ignore and move to the next block.
if _, ok, err := isIdentity(c); err != nil {
return err
} else if ok {
continue
}

if !b.wopts.BlockstoreAllowDuplicatePuts {
if b.ronly.ropts.BlockstoreUseWholeCIDs && b.idx.hasExactCID(c) {
continue // deduplicated by CID
Expand Down
37 changes: 29 additions & 8 deletions v2/blockstore/readwrite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestBlockstore(t *testing.T) {
t.Cleanup(func() { ingester.Finalize() })

cids := make([]cid.Cid, 0)
var idCidCount int
for {
b, err := r.Next()
if err == io.EOF {
Expand All @@ -80,6 +81,12 @@ func TestBlockstore(t *testing.T) {
if has, err := ingester.Has(candidate); !has || err != nil {
t.Fatalf("expected to find %s but didn't: %s", candidate, err)
}

dmh, err := multihash.Decode(b.Cid().Hash())
require.NoError(t, err)
if dmh.Code == multihash.IDENTITY {
idCidCount++
}
}

for _, c := range cids {
Expand Down Expand Up @@ -107,9 +114,8 @@ func TestBlockstore(t *testing.T) {
}
numKeysCh++
}
if numKeysCh != len(cids) {
t.Fatalf("AllKeysChan returned an unexpected amount of keys; expected %v but got %v", len(cids), numKeysCh)
}
expectedCidCount := len(cids) - idCidCount
require.Equal(t, expectedCidCount, numKeysCh, "AllKeysChan returned an unexpected amount of keys; expected %v but got %v", expectedCidCount, numKeysCh)

for _, c := range cids {
b, err := robs.Get(c)
Expand Down Expand Up @@ -347,7 +353,7 @@ func TestBlockstoreResumption(t *testing.T) {
require.NoError(t, err)

// For each block resume on the same file, putting blocks one at a time.
var wantBlockCountSoFar int
var wantBlockCountSoFar, idCidCount int
wantBlocks := make(map[cid.Cid]blocks.Block)
for {
b, err := r.Next()
Expand All @@ -358,6 +364,12 @@ func TestBlockstoreResumption(t *testing.T) {
wantBlockCountSoFar++
wantBlocks[b.Cid()] = b

dmh, err := multihash.Decode(b.Cid().Hash())
require.NoError(t, err)
if dmh.Code == multihash.IDENTITY {
idCidCount++
}

// 30% chance of subject failing; more concretely: re-instantiating blockstore with the same
// file without calling Finalize. The higher this percentage the slower the test runs
// considering the number of blocks in the original CARv1 test payload.
Expand Down Expand Up @@ -402,7 +414,7 @@ func TestBlockstoreResumption(t *testing.T) {
gotBlockCountSoFar++
}
// Assert the number of blocks in file are as expected calculated via AllKeysChan
require.Equal(t, wantBlockCountSoFar, gotBlockCountSoFar)
require.Equal(t, wantBlockCountSoFar-idCidCount, gotBlockCountSoFar)
}
}
subject.Discard()
Expand Down Expand Up @@ -433,22 +445,31 @@ func TestBlockstoreResumption(t *testing.T) {
require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header)
for {
wantNextBlock, wantErr := wantPayloadReader.Next()
gotNextBlock, gotErr := gotPayloadReader.Next()
if wantErr == io.EOF {
gotNextBlock, gotErr := gotPayloadReader.Next()
require.Equal(t, wantErr, gotErr)
require.Nil(t, gotNextBlock)
break
}
require.NoError(t, wantErr)

dmh, err := multihash.Decode(wantNextBlock.Cid().Hash())
require.NoError(t, err)
if dmh.Code == multihash.IDENTITY {
continue
}

gotNextBlock, gotErr := gotPayloadReader.Next()
require.NoError(t, gotErr)
require.Equal(t, wantNextBlock, gotNextBlock)
}

// Assert index in resumed from file is identical to index generated directly from original CARv1 payload.
// Assert index in resumed from file is identical to index generated from the data payload portion of the generated CARv2 file.
_, err = v1f.Seek(0, io.SeekStart)
require.NoError(t, err)
gotIdx, err := index.ReadFrom(v2r.IndexReader())
require.NoError(t, err)
wantIdx, err := carv2.GenerateIndex(v1f)
wantIdx, err := carv2.GenerateIndex(v2r.DataReader())
require.NoError(t, err)
require.Equal(t, wantIdx, gotIdx)
}
Expand Down
4 changes: 4 additions & 0 deletions v2/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ type (
// blocks even if the CID's encoding multicodec differs. Other index
// implementations might index the entire CID, the entire multihash, or
// just part of a multihash's digest.
//
// In accordance with the CARv2 specification, Index will never contain information about CIDs
// with multihash.IDENTITY code.
// See: https://ipld.io/specs/transport/car/carv2/#index-format
Index interface {
// Codec provides the multicodec code that the index implements.
//
Expand Down
7 changes: 7 additions & 0 deletions v2/index/indexsorted.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,13 @@ func (m *multiWidthIndex) Load(items []Record) error {
if err != nil {
return err
}

// Ignore records with IDENTITY as required by CARv2 spec.
// See: https://ipld.io/specs/transport/car/carv2/#index-format
if decHash.Code == multihash.IDENTITY {
continue
}

digest := decHash.Digest
idx, ok := idxs[len(digest)]
if !ok {
Expand Down
44 changes: 43 additions & 1 deletion v2/index/indexsorted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"encoding/binary"
"testing"

"github.com/ipfs/go-cid"
"github.com/multiformats/go-multihash"

"github.com/ipfs/go-merkledag"
"github.com/multiformats/go-multicodec"
"github.com/stretchr/testify/require"
Expand All @@ -13,7 +16,7 @@ func TestSortedIndexCodec(t *testing.T) {
require.Equal(t, multicodec.CarIndexSorted, newSorted().Codec())
}

func TestSortedIndex_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) {
func TestIndexSorted_GetReturnsNotFoundWhenCidDoesNotExist(t *testing.T) {
nonExistingKey := merkledag.NewRawNode([]byte("lobstermuncher")).Block.Cid()
tests := []struct {
name string
Expand Down Expand Up @@ -62,3 +65,42 @@ func TestSingleWidthIndex_GetAll(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 3, foundCount)
}

func TestIndexSorted_IgnoresIdentityCids(t *testing.T) {
data := []byte("🐟 in da 🌊d")
// Generate a record with IDENTITY multihash
idMh, err := multihash.Sum(data, multihash.IDENTITY, -1)
require.NoError(t, err)
idRec := Record{
Cid: cid.NewCidV1(cid.Raw, idMh),
Offset: 1,
}
// Generate a record with non-IDENTITY multihash
nonIdMh, err := multihash.Sum(data, multihash.SHA2_256, -1)
require.NoError(t, err)
noIdRec := Record{
Cid: cid.NewCidV1(cid.Raw, nonIdMh),
Offset: 2,
}

subject := newSorted()
err = subject.Load([]Record{idRec, noIdRec})
require.NoError(t, err)

// Assert record with IDENTITY CID is not present.
err = subject.GetAll(idRec.Cid, func(u uint64) bool {
require.Fail(t, "no IDENTITY record shoul be found")
return false
})
require.Equal(t, ErrNotFound, err)

// Assert record with non-IDENTITY CID is indeed present.
var found bool
err = subject.GetAll(noIdRec.Cid, func(gotOffset uint64) bool {
found = true
require.Equal(t, noIdRec.Offset, gotOffset)
return false
})
require.NoError(t, err)
require.True(t, found)
}
Binary file modified v2/testdata/sample-wrapped-v2.car
Binary file not shown.

0 comments on commit acd3ead

Please sign in to comment.