Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add draft of throttled copy #258

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/desync/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type cacheOptions struct {
cache string
ignoreIndexes []string
ignoreChunks []string
throttleRateMillis int
}

func newCacheCommand(ctx context.Context) *cobra.Command {
Expand Down
8 changes: 8 additions & 0 deletions copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import (
// store to populate a cache. If progress is provided, it'll be called when a
// chunk has been processed. Used to draw a progress bar, can be nil.
func Copy(ctx context.Context, ids []ChunkID, src Store, dst WriteStore, n int, pb ProgressBar) error {

in := make(chan ChunkID)
g, ctx := errgroup.WithContext(ctx)


// Setup and start the progressbar if any
pb.SetTotal(len(ids))
pb.Start()
Expand All @@ -22,6 +24,9 @@ func Copy(ctx context.Context, ids []ChunkID, src Store, dst WriteStore, n int,
// Start the workers
for i := 0; i < n; i++ {
g.Go(func() error {



for id := range in {
pb.Increment()
hasChunk, err := dst.HasChunk(id)
Expand All @@ -31,7 +36,10 @@ func Copy(ctx context.Context, ids []ChunkID, src Store, dst WriteStore, n int,
if hasChunk {
continue
}


chunk, err := src.GetChunk(id)

if err != nil {
return err
}
Expand Down
36 changes: 36 additions & 0 deletions copy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package desync

import (
"context"
"testing"

"github.com/stretchr/testify/require"
)

func TestCopy(t *testing.T) {
src_store_dir := t.TempDir()
dst_store_dir := t.TempDir()

src_store, err := NewLocalStore(src_store_dir, StoreOptions{})
require.NoError(t, err)

dst_store, err := NewLocalStore(dst_store_dir, StoreOptions{})
require.NoError(t, err)

first_chunk_data := []byte("some data")
first_chunk := NewChunk(first_chunk_data)
first_chunk_id := first_chunk.ID()

src_store.StoreChunk(first_chunk)
has_the_stored_chunk, _ := src_store.HasChunk(first_chunk_id)
require.True(t, has_the_stored_chunk)

chunks := make([]ChunkID, 1)
chunks[0] = first_chunk_id

Copy(context.Background(), chunks, src_store, dst_store, 1, NewProgressBar(""))
require.NoError(t, err)
has_the_chunk, _ := dst_store.HasChunk(first_chunk_id)

require.True(t, has_the_chunk)
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ require (
golang.org/x/oauth2 v0.7.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
Expand Down
94 changes: 94 additions & 0 deletions ratelimitstore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package desync

import (
"context"
"fmt"

"github.com/pkg/errors"
"golang.org/x/time/rate"
)

type ThrottleOptions struct {
eventRate float64
burstRate int


}


type RateLimitedStore struct {

wrappedStore WriteStore

limiter *rate.Limiter

options ThrottleOptions

}

var RateLimitedExceeded = errors.New("Rate Limit Exceeded")


func NewRateLimitedStore(s WriteStore, options ThrottleOptions) *RateLimitedStore {

limiter := rate.NewLimiter(rate.Limit(options.eventRate), options.burstRate)
return &RateLimitedStore{wrappedStore: s,limiter: limiter, options: options }
}

func (s RateLimitedStore) GetChunk(id ChunkID) (*Chunk, error) {

chunk,err := s.wrappedStore.GetChunk(id)
if err != nil{
return chunk, err
}
ctx := context.Background()

err = s.limiter.WaitN(ctx,1)
return chunk, err
}

func (s RateLimitedStore) HasChunk(id ChunkID) (bool, error) {



has,err := s.wrappedStore.HasChunk(id)
if err != nil{
return has, err
}
ctx :=context.Background()
err = s.limiter.WaitN(ctx,1)
return has, err

}


func (s RateLimitedStore) StoreChunk(chunk *Chunk) error {

// This isn't ideal because what I'm really interested is in size over the wire.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can actually achieve this by using a limit of "bytes per second" (must be equal or larger than max-chunk-size). The here in StoreChunk() you can

	b, err := chunk.Data()
	if err != nil {
		return err
	}
	err = s.limiter.WaitN(ctx, len(b))
	if err != nil {
		return RateLimitedExceeded
	}
        return s.wrappedStore.StoreChunk(chunk)

It's a little more interesting in GetChunk() since you don't know how large it is. One way would be to WaitN(ctx, <avg-chunk-size>) before calling the wrapped store, or WaitN(ctx, <size of data>) after actually pulling the chunk from the wrapped store. The latter can cause spikes early on when concurrency is used, but should average to the rate you set over time.

As for HasChunk(), not sure if you really need to limit it, it's very small. You can just use 1 like you have now if you want to limit it too, could do that before calling the wrappedStore.

_, err := chunk.Data()
if err != nil {
return err
}


ctx := context.Background()



err = s.limiter.WaitN(ctx,1)
if err != nil {

fmt.Println("Rate limit context error:", err)
return RateLimitedExceeded
}

return s.wrappedStore.StoreChunk(chunk)

}

func (s RateLimitedStore) String() string {
return s.wrappedStore.String()
}


func (s RateLimitedStore) Close() error { return nil }
Loading
Loading