Skip to content
This repository has been archived by the owner on Jun 27, 2023. It is now read-only.

Tests for unsharding PR #99

Merged
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 76 additions & 17 deletions hamt/hamt.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,55 @@ import (
"fmt"
"os"

format "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/internal"

bitfield "github.com/ipfs/go-bitfield"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
)

const (
// HashMurmur3 is the multiformats identifier for Murmur3
HashMurmur3 uint64 = 0x22
)

func (ds *Shard) isValueNode() bool {
func init() {
internal.HAMTHashFunction = murmur3Hash
}

func (ds *Shard) IsValueNode() bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be unexported again, it's not used anywhere outside this file

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schomatis I pushed a commit for this

return ds.key != "" && ds.val != nil
}

// A Shard represents the HAMT. It should be initialized with NewShard().
type Shard struct {
childer *childer

tableSize int
// Entries per node (number of possible childs indexed by the partial key).
schomatis marked this conversation as resolved.
Show resolved Hide resolved
tableSize int
// Bits needed to encode child indexes (log2 of number of entries). This is
// the number of bits taken from the hash key on each level of the tree.
tableSizeLg2 int

builder cid.Builder
hashFunc uint64

// String format with number of zeros that will be present in the hexadecimal
// encoding of the child index to always reach the fixed maxpadlen chars.
// Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal
// format padding with zeros to always reach 4 characters).
prefixPadStr string
maxpadlen int
// Length in chars of string that encodes child indexes. We encode indexes
// as hexadecimal strings to this is log4 of number of entries.
maxpadlen int

dserv ipld.DAGService

// FIXME: Remove. We don't actually store "value nodes". This confusing
// abstraction just removes the maxpadlen from the link names to extract
// the actual value link the trie is storing.
Comment on lines +74 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding of your comment here is that val.Name and maxpadlen can be used to compute key and so we shouldn't bother caching it here and just recompute it when needed. Is that correct?

Also, can you clarify if this a FIXME intended for this set of PRs?

// leaf node
key string
val *ipld.Link
Expand All @@ -68,12 +86,13 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
return nil, err
}

// FIXME: Make this at least a static configuration for testing.
ds.hashFunc = HashMurmur3
return ds, nil
}

func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
lg2s, err := logtwo(size)
lg2s, err := Logtwo(size)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -211,7 +230,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
// name key in this Shard or its children. It also returns the previous link
// under that name key (if any).
func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)
err := ds.dserv.Add(ctx, node)
if err != nil {
return nil, err
Expand All @@ -221,6 +240,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node
if err != nil {
return nil, err
}

// FIXME: We don't need to set the name here, it will get overwritten.
// This is confusing, confirm and remove this line.
lnk.Name = ds.linkNamePrefix(0) + name

return ds.setValue(ctx, hv, name, lnk)
Expand All @@ -236,13 +258,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error {
// RemoveAndPrevious is similar to the public Remove but also returns the
// old removed link (if it exists).
func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)
return ds.setValue(ctx, hv, name, nil)
}

// Find searches for a child node by 'name' within this hamt
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)

var out *ipld.Link
err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
Expand Down Expand Up @@ -276,7 +298,7 @@ func (ds *Shard) childLinkType(lnk *ipld.Link) (linkType, error) {

// Link returns a merklelink to this shard node
func (ds *Shard) Link() (*ipld.Link, error) {
if ds.isValueNode() {
if ds.IsValueNode() {
return ds.val, nil
}

Expand Down Expand Up @@ -305,7 +327,7 @@ func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func
return err
}

if child.isValueNode() {
if child.IsValueNode() {
if child.key == key {
return cb(child)
}
Expand All @@ -332,6 +354,21 @@ func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
return links, nil
}

// FIXME: Check which functions do we need to actually expose.
func (ds *Shard) EnumAll(ctx context.Context) ([]*ipld.Link, error) {
var links []*ipld.Link

linkResults := ds.EnumAllAsync(ctx)

for linkResult := range linkResults {
if linkResult.Err != nil {
return links, linkResult.Err
}
links = append(links, linkResult.Link)
}
return links, nil
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this function is a duplicate of EnumLinks

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schomatis I pushed a commit removing this

// ForEachLink walks the Shard and calls the given function.
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
return ds.walkTrie(ctx, func(sv *Shard) error {
Expand All @@ -345,6 +382,31 @@ func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) erro
// EnumLinksAsync returns a channel which will receive Links in the directory
// as they are enumerated, where order is not guaranteed
func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult {
linkResults := make(chan format.LinkResult)
ctx, cancel := context.WithCancel(ctx)
go func() {
defer close(linkResults)
defer cancel()
getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults)
cset := cid.NewSet()
rootNode, err := ds.Node()
if err != nil {
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
return
}
// FIXME: Make concurrency an option for testing.
err = dag.Walk(ctx, getLinks, rootNode.Cid(), cset.Visit, dag.Concurrent())
//err = dag.Walk(ctx, getLinks, rootNode.Cid(), cset.Visit)
if err != nil {
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
}
}()
return linkResults
}

// EnumLinksAsync returns a channel which will receive Links in the directory
// as they are enumerated, where order is not guaranteed
func (ds *Shard) EnumAllAsync(ctx context.Context) <-chan format.LinkResult {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't needed anymore given EnumLinksAsync should be concurrent, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems not, and also seems to be unused. Removing

linkResults := make(chan format.LinkResult)
ctx, cancel := context.WithCancel(ctx)
go func() {
Expand Down Expand Up @@ -423,7 +485,7 @@ func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r for

func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error {
return ds.childer.each(ctx, func(s *Shard) error {
if s.isValueNode() {
if s.IsValueNode() {
if err := cb(s); err != nil {
return err
}
Expand Down Expand Up @@ -455,7 +517,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
return
}

if child.isValueNode() {
if child.IsValueNode() {
// Leaf node. This is the base case of this recursive function.
if child.key == key {
// We are in the correct shard (tree level) so we modify this child
Expand Down Expand Up @@ -489,10 +551,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
return nil, err
}
child.builder = ds.builder
chhv := &hashBits{
b: hash([]byte(grandChild.key)),
consumed: hv.consumed,
}
chhv := newConsumedHashBits(grandChild.key, hv.consumed)

// We explicitly ignore the oldValue returned by the next two insertions
// (which will be nil) to highlight there is no overwrite here: they are
Expand Down Expand Up @@ -536,7 +595,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
// Have we loaded the child? Prefer that.
schild := child.childer.child(0)
if schild != nil {
if schild.isValueNode() {
if schild.IsValueNode() {
ds.childer.set(schild, i)
}
return
Expand Down
18 changes: 15 additions & 3 deletions hamt/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package hamt

import (
"fmt"
"math/bits"

"github.com/ipfs/go-unixfs/internal"

"github.com/spaolacci/murmur3"
"math/bits"
)

// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
Expand All @@ -13,6 +15,16 @@ type hashBits struct {
consumed int
}

func newHashBits(val string) *hashBits {
return &hashBits{b: internal.HAMTHashFunction([]byte(val))}
}

func newConsumedHashBits(val string, consumed int) *hashBits {
hv := &hashBits{b: internal.HAMTHashFunction([]byte(val))}
hv.consumed = consumed
return hv
}

func mkmask(n int) byte {
return (1 << uint(n)) - 1
}
Expand Down Expand Up @@ -50,7 +62,7 @@ func (hb *hashBits) next(i int) int {
}
}

func logtwo(v int) (int, error) {
func Logtwo(v int) (int, error) {
if v <= 0 {
return 0, fmt.Errorf("hamt size should be a power of two")
}
Expand All @@ -61,7 +73,7 @@ func logtwo(v int) (int, error) {
return lg2, nil
}

func hash(val []byte) []byte {
func murmur3Hash(val []byte) []byte {
h := murmur3.New64()
h.Write(val)
return h.Sum(nil)
Expand Down
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package internal

var HAMTHashFunction func(val []byte) []byte
Loading