Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve heap iterators. #4731

Merged
merged 1 commit into from
Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pkg/iter/entry_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ func NewHeapIterator(ctx context.Context, is []EntryIterator, direction logproto
result := &heapIterator{is: is, stats: stats.GetChunkData(ctx)}
switch direction {
case logproto.BACKWARD:
result.heap = &iteratorMaxHeap{}
result.heap = &iteratorMaxHeap{iteratorHeap: make([]EntryIterator, 0, len(is))}
case logproto.FORWARD:
result.heap = &iteratorMinHeap{}
result.heap = &iteratorMinHeap{iteratorHeap: make([]EntryIterator, 0, len(is))}
default:
panic("bad direction")
}
Expand Down Expand Up @@ -220,6 +220,16 @@ func (i *heapIterator) Next() bool {
return false
}

// shortcut for the last iterator.
if i.heap.Len() == 1 {
i.currEntry = i.heap.Peek().Entry()
i.currLabels = i.heap.Peek().Labels()
if !i.heap.Peek().Next() {
i.heap.Pop()
}
return true
}

// We support multiple entries with the same timestamp, and we want to
// preserve their original order. We look at all the top entries in the
// heap with the same timestamp, and pop the ones whose common value
Expand Down
58 changes: 51 additions & 7 deletions pkg/iter/entry_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package iter
import (
"context"
"fmt"
"math/rand"
"testing"
"time"

Expand All @@ -14,8 +15,10 @@ import (
"github.com/grafana/loki/pkg/logqlmodel/stats"
)

const testSize = 10
const defaultLabels = "{foo=\"baz\"}"
const (
testSize = 10
defaultLabels = "{foo=\"baz\"}"
)

func TestIterator(t *testing.T) {
for i, tc := range []struct {
Expand Down Expand Up @@ -486,7 +489,8 @@ func Test_DuplicateCount(t *testing.T) {
Timestamp: time.Unix(0, 4),
Line: "bar",
},
}}),
},
}),
},
logproto.FORWARD,
6,
Expand All @@ -503,7 +507,8 @@ func Test_DuplicateCount(t *testing.T) {
Timestamp: time.Unix(0, 4),
Line: "bar",
},
}}),
},
}),
},
logproto.BACKWARD,
6,
Expand All @@ -517,7 +522,8 @@ func Test_DuplicateCount(t *testing.T) {
Timestamp: time.Unix(0, 4),
Line: "bar",
},
}}),
},
}),
},
logproto.FORWARD,
0,
Expand All @@ -531,7 +537,8 @@ func Test_DuplicateCount(t *testing.T) {
Timestamp: time.Unix(0, 4),
Line: "bar",
},
}}),
},
}),
},
logproto.BACKWARD,
0,
Expand All @@ -550,7 +557,6 @@ func Test_DuplicateCount(t *testing.T) {
}

func Test_timeRangedIterator_Next(t *testing.T) {

tests := []struct {
mint time.Time
maxt time.Time
Expand Down Expand Up @@ -630,3 +636,41 @@ func TestNonOverlappingClose(t *testing.T) {
require.Equal(t, true, a.closed.Load())
require.Equal(t, true, b.closed.Load())
}

func BenchmarkHeapIterator(b *testing.B) {
var (
ctx = context.Background()
streams []logproto.Stream
entriesCount = 10000
streamsCount = 100
)
for i := 0; i < streamsCount; i++ {
streams = append(streams, logproto.Stream{
Labels: fmt.Sprintf(`{i="%d"}`, i),
})
}
for i := 0; i < entriesCount; i++ {
streams[i%streamsCount].Entries = append(streams[i%streamsCount].Entries, logproto.Entry{
Timestamp: time.Unix(0, int64(streamsCount-i)),
Line: fmt.Sprintf("%d", i),
})
}
rand.Shuffle(len(streams), func(i, j int) {
streams[i], streams[j] = streams[j], streams[i]
})

b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
var itrs []EntryIterator
for i := 0; i < streamsCount; i++ {
itrs = append(itrs, NewStreamIterator(streams[i]))
}
b.StartTimer()
it := NewHeapIterator(ctx, itrs, logproto.BACKWARD)
for it.Next() {
it.Entry()
}
it.Close()
}
}
14 changes: 12 additions & 2 deletions pkg/iter/sample_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,11 @@ type heapSampleIterator struct {
// NewHeapSampleIterator returns a new iterator which uses a heap to merge together
// entries for multiple iterators.
func NewHeapSampleIterator(ctx context.Context, is []SampleIterator) SampleIterator {
h := sampleIteratorHeap(make([]SampleIterator, 0, len(is)))
return &heapSampleIterator{
stats: stats.GetChunkData(ctx),
is: is,
heap: &sampleIteratorHeap{},
heap: &h,
tuples: make([]sampletuple, 0, len(is)),
}
}
Expand Down Expand Up @@ -210,6 +211,16 @@ func (i *heapSampleIterator) Next() bool {
return false
}

// shortcut for the last iterator.
if i.heap.Len() == 1 {
i.curr = i.heap.Peek().Sample()
i.currLabels = i.heap.Peek().Labels()
if !i.heap.Peek().Next() {
i.heap.Pop()
}
return true
}

// We support multiple entries with the same timestamp, and we want to
// preserve their original order. We look at all the top entries in the
// heap with the same timestamp, and pop the ones whose common value
Expand Down Expand Up @@ -359,7 +370,6 @@ func (w *withCloseSampleIterator) Close() error {
if err := w.closeFn(); err != nil {
w.errs = append(w.errs, err)
}

})
if len(w.errs) == 0 {
return nil
Expand Down
42 changes: 41 additions & 1 deletion pkg/iter/sample_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package iter

import (
"context"
"fmt"
"io"
"math/rand"
"testing"
"time"

Expand Down Expand Up @@ -94,6 +96,7 @@ var varSeries = logproto.Series{
sample(1), sample(2), sample(3),
},
}

var carSeries = logproto.Series{
Labels: `{foo="car"}`,
Samples: []logproto.Sample{
Expand Down Expand Up @@ -145,7 +148,6 @@ func (f *fakeSampleClient) Recv() (*logproto.SampleQueryResponse, error) {
func (fakeSampleClient) Context() context.Context { return context.Background() }
func (fakeSampleClient) CloseSend() error { return nil }
func TestNewSampleQueryClientIterator(t *testing.T) {

it := NewSampleQueryClientIterator(&fakeSampleClient{
series: [][]logproto.Series{
{varSeries},
Expand Down Expand Up @@ -274,3 +276,41 @@ func TestSampleIteratorWithClose_ReturnsError(t *testing.T) {
err2 := it.Close()
assert.Equal(t, err, err2)
}

func BenchmarkHeapSampleIterator(b *testing.B) {
var (
ctx = context.Background()
series []logproto.Series
entriesCount = 10000
seriesCount = 100
)
for i := 0; i < seriesCount; i++ {
series = append(series, logproto.Series{
Labels: fmt.Sprintf(`{i="%d"}`, i),
})
}
for i := 0; i < entriesCount; i++ {
series[i%seriesCount].Samples = append(series[i%seriesCount].Samples, logproto.Sample{
Timestamp: int64(seriesCount - i),
Value: float64(i),
})
}
rand.Shuffle(len(series), func(i, j int) {
series[i], series[j] = series[j], series[i]
})

b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
var itrs []SampleIterator
for i := 0; i < seriesCount; i++ {
itrs = append(itrs, NewSeriesIterator(series[i]))
}
b.StartTimer()
it := NewHeapSampleIterator(ctx, itrs)
for it.Next() {
it.Sample()
}
it.Close()
}
}