From 72ca5651d5ed76abf260378737cccfb410af9d4e Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 27 Feb 2023 11:56:19 +0100 Subject: [PATCH 1/4] Add NextInsecure() method Attempt to make CAR traversal a little bit faster by not forcing users to hash every single in a CAR. --- v2/block_reader.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/v2/block_reader.go b/v2/block_reader.go index 27d134b0..356e3300 100644 --- a/v2/block_reader.go +++ b/v2/block_reader.go @@ -134,6 +134,20 @@ func (br *BlockReader) Next() (blocks.Block, error) { return blocks.NewBlockWithCid(data, c) } +// NextInsecure works in the same way as Next(), except it does not re-hash +// every block to verify that the CID corresponds to the block. It should not +// be used with untrusted or potentially corrupted CAR files. On the plus side, it is much ligher on the CPU. +func (br *BlockReader) NextInsecure() (blocks.Block, error) { + c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize) + if err != nil { + return nil, err + } + + ss := uint64(c.ByteLen()) + uint64(len(data)) + br.offset += uint64(varint.UvarintSize(ss)) + ss + return blocks.NewBlockWithCid(data, c) +} + type BlockMetadata struct { cid.Cid Offset uint64 From b0c4055f31684edb5f19bf5fcc40e6b548214271 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Tue, 28 Feb 2023 18:05:23 +0100 Subject: [PATCH 2/4] Add TrustedCAR option to BlockReader (remove NextInsecure()) --- v2/block_reader.go | 28 ++++++++-------------------- v2/options.go | 9 +++++++++ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/v2/block_reader.go b/v2/block_reader.go index 356e3300..fff477a6 100644 --- a/v2/block_reader.go +++ b/v2/block_reader.go @@ -120,27 +120,15 @@ func (br *BlockReader) Next() (blocks.Block, error) { return nil, err } - hashed, err := c.Prefix().Sum(data) - if err != nil { - return nil, err - } - - if !hashed.Equals(c) { - return nil, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed) - } - - ss := uint64(c.ByteLen()) + uint64(len(data)) - br.offset += uint64(varint.UvarintSize(ss)) + ss - return blocks.NewBlockWithCid(data, c) -} + if !br.opts.TrustedCAR { + hashed, err := c.Prefix().Sum(data) + if err != nil { + return nil, err + } -// NextInsecure works in the same way as Next(), except it does not re-hash -// every block to verify that the CID corresponds to the block. It should not -// be used with untrusted or potentially corrupted CAR files. On the plus side, it is much ligher on the CPU. -func (br *BlockReader) NextInsecure() (blocks.Block, error) { - c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize) - if err != nil { - return nil, err + if !hashed.Equals(c) { + return nil, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed) + } } ss := uint64(c.ByteLen()) + uint64(len(data)) diff --git a/v2/options.go b/v2/options.go index dbde3f3e..e28589e8 100644 --- a/v2/options.go +++ b/v2/options.go @@ -60,6 +60,7 @@ type Options struct { MaxTraversalLinks uint64 WriteAsCarV1 bool TraversalPrototypeChooser traversal.LinkTargetNodePrototypeChooser + TrustedCAR bool MaxAllowedHeaderSize uint64 MaxAllowedSectionSize uint64 @@ -160,6 +161,14 @@ func WithTraversalPrototypeChooser(t traversal.LinkTargetNodePrototypeChooser) O } } +// WithTrustedCAR specifies whether CIDs match the block data as they are read +// from the CAR files. +func WithTrustedCAR(t bool) Option { + return func(o *Options) { + o.TrustedCAR = t + } +} + // MaxAllowedHeaderSize overrides the default maximum size (of 32 MiB) that a // CARv1 decode (including within a CARv2 container) will allow a header to be // without erroring. From 0f53819dc69376d5e8aa3f109e93db3f2952caa9 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Mon, 6 Mar 2023 10:49:25 +0100 Subject: [PATCH 3/4] Add test for TrustedCAR option --- v2/block_reader_test.go | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/v2/block_reader_test.go b/v2/block_reader_test.go index b5958c7f..434f3538 100644 --- a/v2/block_reader_test.go +++ b/v2/block_reader_test.go @@ -179,6 +179,41 @@ func TestMaxSectionLength(t *testing.T) { require.True(t, bytes.Equal(block, readBlock.RawData())) } +func TestTrustedCAR(t *testing.T) { + // headerHex is the zero-roots CARv1 header + const headerHex = "11a265726f6f7473806776657273696f6e01" + headerBytes, _ := hex.DecodeString(headerHex) + // block of zeros + block := make([]byte, 5) + // CID for that block + pfx := cid.NewPrefixV1(cid.Raw, mh.SHA2_256) + cid, err := pfx.Sum(block) + require.NoError(t, err) + + // Modity the block so it won't match CID anymore + block[2] = 0xFF + // construct CAR + var buf bytes.Buffer + buf.Write(headerBytes) + buf.Write(varint.ToUvarint(uint64(len(cid.Bytes()) + len(block)))) + buf.Write(cid.Bytes()) + buf.Write(block) + + // try to read it as trusted + car, err := carv2.NewBlockReader(bytes.NewReader(buf.Bytes()), carv2.WithTrustedCAR(true)) + require.NoError(t, err) + // error should occur on first section read + _, err = car.Next() + require.NoError(t, err) + + // Try to read it as untrusted - should fail + car, err = carv2.NewBlockReader(bytes.NewReader(buf.Bytes()), carv2.WithTrustedCAR(false)) + require.NoError(t, err) + // error should occur on first section read + _, err = car.Next() + require.EqualError(t, err, "mismatch in content integrity, expected: bafkreieikviivlpbn3cxhuq6njef37ikoysaqxa2cs26zxleqxpay2bzuq, got: bafkreidgklrppelx4fxcsna7cxvo3g7ayedfojkqeuus6kz6e4hy7gukmy") +} + func TestMaxHeaderLength(t *testing.T) { // headerHex is the is a 5 root CARv1 header const headerHex = "de01a265726f6f747385d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b5016776657273696f6e01" From 74ad50bd76d9e831c671a8b7740e089ac0bb5801 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 7 Mar 2023 10:13:06 +1100 Subject: [PATCH 4/4] fix: apply suggestions from code review --- v2/block_reader_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/v2/block_reader_test.go b/v2/block_reader_test.go index 434f3538..ca6c2404 100644 --- a/v2/block_reader_test.go +++ b/v2/block_reader_test.go @@ -190,7 +190,7 @@ func TestTrustedCAR(t *testing.T) { cid, err := pfx.Sum(block) require.NoError(t, err) - // Modity the block so it won't match CID anymore + // Modify the block so it won't match CID anymore block[2] = 0xFF // construct CAR var buf bytes.Buffer @@ -202,7 +202,6 @@ func TestTrustedCAR(t *testing.T) { // try to read it as trusted car, err := carv2.NewBlockReader(bytes.NewReader(buf.Bytes()), carv2.WithTrustedCAR(true)) require.NoError(t, err) - // error should occur on first section read _, err = car.Next() require.NoError(t, err)