Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OTEL-2348] Improve DDSketch to Sketch conversion #464

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions pkg/otlp/metrics/sketches_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ func TestKnownDistributionsQuantile(t *testing.T) {
startTime := timeNow.Add(-10 * time.Second)
name := "example.histo"
const (
N uint64 = 1_000
N uint64 = 2_000
M uint64 = 100
)

Expand Down Expand Up @@ -545,10 +545,6 @@ func TestKnownDistributionsQuantile(t *testing.T) {
{
name: "U-quadratic distribution (a=-N,b=0)",
quantile: sketchtest.UQuadraticQ(-fN, 0),
// Similar to the pkg/quantile tests, the p99 for this test fails, likely due to the shift of leftover bucket counts the right that is performed
// during the DDSketch -> quantile.Sketch conversion, causing the p99 of the output sketch to fall on 0
// (which means the InEpsilon check returns 1).
excludedQuantiles: map[int]struct{}{99: {}},
},
{
name: "U-quadratic distribution (a=-N,b=N)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
4,
1,
3,
3,
10,
0,
2,
1,
3,
4
2,
2,
4,
1
]
}
],
Expand Down
47 changes: 14 additions & 33 deletions pkg/quantile/ddsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ func createDDSketchWithSketchMapping(c *Config, inputSketch *ddsketch.DDSketch)

// Take parameters that match the Sketch mapping, and create a LogarithmicMapping out of them
gamma := c.gamma.v
// Note: there's a 0.5 shift here because we take the floor value on DDSketch, vs. rounding to
// integer in the Agent sketch.
offset := float64(c.norm.bias) + 0.5
offset := float64(c.norm.bias)
newMapping, err := mapping.NewLogarithmicMappingWithGamma(gamma, offset)
if err != nil {
return nil, fmt.Errorf("couldn't create LogarithmicMapping for DDSketch: %w", err)
Expand All @@ -51,39 +49,27 @@ type floatKeyCount struct {

// convertFloatCountsToIntCounts converts a list of float counts to integer counts,
// preserving the total count of the list by tracking leftover decimal counts.
// TODO: this tends to shift sketches towards the right (since the leftover counts
// get added to the rightmost bucket). This could be improved by adding leftover
// counts to the key that's the weighted average of keys that contribute to that leftover count.
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) []KeyCount {
// It additionally returns the total count as an integer.
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) ([]KeyCount, uint) {
keyCounts := make([]KeyCount, 0, len(floatKeyCounts))

sort.Slice(floatKeyCounts, func(i, j int) bool {
return floatKeyCounts[i].k < floatKeyCounts[j].k
})

leftoverCount := 0.0
floatTotal := 0.0
intTotal := uint(0)
for _, fkc := range floatKeyCounts {
key := fkc.k
count := fkc.c

// Add leftovers from previous bucket, and compute leftovers
// for the next bucket
count += leftoverCount
uintCount := uint(count)
leftoverCount = count - float64(uintCount)

keyCounts = append(keyCounts, KeyCount{k: Key(key), n: uintCount})
floatTotal += fkc.c
// This is mathematically equivalent to Round(floatTotal - intTotal)
rounded := uint(math.Round(floatTotal)) - intTotal
intTotal += rounded
// At this point, |floatTotal - intTotal| <= 0.5
keyCounts = append(keyCounts, KeyCount{k: Key(fkc.k), n: rounded})
}
// intTotal == uint(math.Round(floatTotal))

// Edge case where there may be some leftover count because the total count
// isn't an int (or due to float64 precision errors). In this case, round to
// nearest.
if leftoverCount >= 0.5 {
lastIndex := len(keyCounts) - 1
keyCounts[lastIndex] = KeyCount{k: keyCounts[lastIndex].k, n: keyCounts[lastIndex].n + 1}
}

return keyCounts
return keyCounts, intTotal
}

// convertDDSketchIntoSketch takes a DDSketch and moves its data to a Sketch.
Expand Down Expand Up @@ -155,19 +141,14 @@ func convertDDSketchIntoSketch(c *Config, inputSketch *ddsketch.DDSketch) (*Sket
floatKeyCounts = append(floatKeyCounts, floatKeyCount{k: 0, c: zeroes})

// Generate the integer KeyCount objects from the counts we retrieved
keyCounts := convertFloatCountsToIntCounts(floatKeyCounts)
keyCounts, cnt := convertFloatCountsToIntCounts(floatKeyCounts)

// Populate sparseStore object with the collected keyCounts
// insertCounts will take care of creating multiple uint16 bins for a
// single key if the count overflows uint16
sparseStore.insertCounts(c, keyCounts)

// Create summary object
// Calculate the total count that was inserted in the Sketch
var cnt uint
for _, v := range keyCounts {
cnt += v.n
}
sum := inputSketch.GetSum()
avg := sum / float64(cnt)
max, err := inputSketch.GetMaxValue()
Expand Down
57 changes: 39 additions & 18 deletions pkg/quantile/ddsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,9 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
name: "Uniform distribution (a=0,b=N)",
quantile: sketchtest.UniformQ(0, N),
},
// The p99 for this test fails, likely due to the shift of leftover bucket counts the right that is performed
// during the DDSketch -> Sketch conversion, causing the p99 of the output sketch to fall on 0
// (which means the InEpsilon check returns 1).
{
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
excludedQuantiles: map[int]bool{99: true},
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
},
{
name: "Uniform distribution (a=-N,b=N)",
Expand All @@ -213,18 +209,16 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
quantile: sketchtest.UQuadraticQ(0, N),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N/2,b=N/2)",
quantile: sketchtest.UQuadraticQ(-N/2, N/2),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N,b=0)",
quantile: sketchtest.UQuadraticQ(-N, 0),
},
// Same as above, p99 fails.
{
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
excludedQuantiles: map[int]bool{99: true},
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
},
{
name: "Truncated Normal distribution (a=0,b=8,mu=0, sigma=1e-3)",
Expand Down Expand Up @@ -280,12 +274,39 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
outputSketch, err := convertDDSketchIntoSketch(sketchConfig, convertedSketch)
require.NoError(t, err)

// Conversion accuracy formula taken from:
// https://github.com/DataDog/logs-backend/blob/895e56c9eefa1c28a3affbdd0027f58a4c6f4322/domains/event-store/libs/event-store-aggregate/src/test/java/com/dd/event/store/api/query/sketch/SketchTest.java#L409-L422
inputGamma := (1.0 + convertedSketch.RelativeAccuracy()) / (1.0 - convertedSketch.RelativeAccuracy())
outputGamma := sketchConfig.gamma.v
conversionGamma := inputGamma * outputGamma * outputGamma
conversionRelativeAccuracy := (conversionGamma - 1) / (conversionGamma + 1)
/* We compute the expected bound on the relative error between percentile values before
* and after the conversion.
*
* The error on accumulated bin counts caused by the rounding process is bounded by 0.5
* by design. This means that the quantiles should shift by one bin at most in most
* circumstances, including the scenarios under test.
*
* Note that there are some rare edge cases:
*
* - If the input DDSketch has a mapping much coarser than the default sketchConfig,
* and it contains a bin with a low count (say, 1), that bin could get remapped to
* multiple output bins with a count < 0.5. In that case, a 0.5 error on the
* accumulated count could translate to quantiles shifting by multiple bins.
*
* - Bins with zero counts are ignored during the conversion, so a "shift by one bin"
* can actually greatly affect the result.
* For instance, a DDSketch with bins like:
* [1.2 1.2 0.0 ... 0.0 1.2]
* will be rounded as:
* [1 1 0 ... 0 2 ]
* with a fractional count of 0.4 being shifted by arbitrarily many bins.
* However, this should not happen for DDSketches with integer counts, or those
* remapped from one: the accumulated count up to the zero bins would have to be an
* integer, preventing "long-distance" carry over like this.
*
* An additional source of error is that while `DDSketch.GetValueAtQuantile` returns the
* result bin's center value, `Sketch.Quantile` interpolates linearly between its two
* bounds.
*
* This means the expected worst case is a comparison between the upper bound of bin N+1
* and the center of bin N, giving the following accuracy formula:
*/
conversionRelativeAccuracy := (sketchConfig.gamma.v*sketchConfig.gamma.v)/(1+convertedSketch.RelativeAccuracy()) - 1

// Check the count of the output sketch
assert.InDelta(
Expand Down
3 changes: 3 additions & 0 deletions pkg/quantile/sparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ func (s *Sketch) Quantile(c *Config, q float64) float64 {

vLow := c.f64(b.k)
vHigh := vLow * c.gamma.v
if b.k < 0 { // bounds need to be swapped for negative bins
vLow, vHigh = vHigh, vLow
}

switch i {
case s.bins.Len():
Expand Down
Loading