Skip to content

Commit

Permalink
[pkg/pdatautil] Optimize the hashing function for pcommon.Map (open…
Browse files Browse the repository at this point in the history
…-telemetry#27840)

**Description:**

Improve the performance of the `MapHash` function, mostly by using the
xxhash architecture optimized version.

`hash.Sum` is a 'Go-code' only implementation
`xxhash.Sum64` has optimized versions for different architectures
Both result in the exact same hash though.


For the given benchmarks, the gain is > 10%

From `main`:
```
goos: linux
goarch: amd64
pkg: github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil
cpu: 11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz
BenchmarkMapHashFourItems-16                  	47676003	       236.0 ns/op	      24 B/op	       1 allocs/op
BenchmarkMapHashEightItems-16                 	22551222	       532.3 ns/op	      32 B/op	       2 allocs/op
BenchmarkMapHashWithEmbeddedSliceAndMap-16    	14098969	       893.1 ns/op	      56 B/op	       3 allocs/op
```

The PR:
```
goos: linux
goarch: amd64
pkg: github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil
cpu: 11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz
BenchmarkMapHashFourItems-16                  	59854737	       203.4 ns/op	      24 B/op	       1 allocs/op
BenchmarkMapHashEightItems-16                 	25609375	       475.0 ns/op	      32 B/op	       2 allocs/op
BenchmarkMapHashWithEmbeddedSliceAndMap-16    	15950144	       753.8 ns/op	      56 B/op	       3 allocs/op
```

**Testing:**
(Re-)using the same tests and benchmarks to prove semantics didn't
change.
  • Loading branch information
mdonkers authored Oct 19, 2023
1 parent c44ad3c commit 762f843
Showing 1 changed file with 44 additions and 48 deletions.
92 changes: 44 additions & 48 deletions pkg/pdatautil/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package pdatautil // import "github.com/open-telemetry/opentelemetry-collector-c

import (
"encoding/binary"
"hash"
"math"
"sort"
"sync"
Expand All @@ -28,23 +27,19 @@ var (
valMapSuffix = []byte{'\xfd'}
valSlicePrefix = []byte{'\xfe'}
valSliceSuffix = []byte{'\xff'}

emptyHash = [16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
)

type hashWriter struct {
h hash.Hash
strBuf []byte
byteBuf []byte
keysBuf []string
sumHash []byte
numBuf []byte
}

func newHashWriter() *hashWriter {
return &hashWriter{
h: xxhash.New(),
strBuf: make([]byte, 0, 128),
byteBuf: make([]byte, 0, 512),
keysBuf: make([]string, 0, 16),
sumHash: make([]byte, 0, 16),
numBuf: make([]byte, 8),
}
}

Expand All @@ -55,19 +50,27 @@ var hashWriterPool = &sync.Pool{
// MapHash return a hash for the provided map.
// Maps with the same underlying key/value pairs in different order produce the same deterministic hash value.
func MapHash(m pcommon.Map) [16]byte {
if m.Len() == 0 {
return emptyHash
}

hw := hashWriterPool.Get().(*hashWriter)
defer hashWriterPool.Put(hw)
hw.h.Reset()
hw.byteBuf = hw.byteBuf[:0]

hw.writeMapHash(m)

return hw.hashSum128()
}

// ValueHash return a hash for the provided pcommon.Value.
func ValueHash(v pcommon.Value) [16]byte {
hw := hashWriterPool.Get().(*hashWriter)
defer hashWriterPool.Put(hw)
hw.h.Reset()
hw.byteBuf = hw.byteBuf[:0]

hw.writeValueHash(v)

return hw.hashSum128()
}

Expand All @@ -90,70 +93,63 @@ func (hw *hashWriter) writeMapHash(m pcommon.Map) {
sort.Strings(workingKeySet)
for _, k := range workingKeySet {
v, _ := m.Get(k)
hw.strBuf = hw.strBuf[:0]
hw.strBuf = append(hw.strBuf, keyPrefix...)
hw.strBuf = append(hw.strBuf, k...)
hw.h.Write(hw.strBuf)
hw.byteBuf = append(hw.byteBuf, keyPrefix...)
hw.byteBuf = append(hw.byteBuf, k...)
hw.writeValueHash(v)
}

// Remove all keys that were added to the buffer during this call of the function
hw.keysBuf = hw.keysBuf[:nextIndex]
}

func (hw *hashWriter) writeSliceHash(sl pcommon.Slice) {
for i := 0; i < sl.Len(); i++ {
hw.writeValueHash(sl.At(i))
}
}

func (hw *hashWriter) writeValueHash(v pcommon.Value) {
switch v.Type() {
case pcommon.ValueTypeStr:
hw.strBuf = hw.strBuf[:0]
hw.strBuf = append(hw.strBuf, valStrPrefix...)
hw.strBuf = append(hw.strBuf, v.Str()...)
hw.h.Write(hw.strBuf)
hw.byteBuf = append(hw.byteBuf, valStrPrefix...)
hw.byteBuf = append(hw.byteBuf, v.Str()...)
case pcommon.ValueTypeBool:
if v.Bool() {
hw.h.Write(valBoolTrue)
hw.byteBuf = append(hw.byteBuf, valBoolTrue...)
} else {
hw.h.Write(valBoolFalse)
hw.byteBuf = append(hw.byteBuf, valBoolFalse...)
}
case pcommon.ValueTypeInt:
hw.h.Write(valIntPrefix)
binary.LittleEndian.PutUint64(hw.numBuf, uint64(v.Int()))
hw.h.Write(hw.numBuf)
hw.byteBuf = append(hw.byteBuf, valIntPrefix...)
hw.byteBuf = binary.LittleEndian.AppendUint64(hw.byteBuf, uint64(v.Int()))
case pcommon.ValueTypeDouble:
hw.h.Write(valDoublePrefix)
binary.LittleEndian.PutUint64(hw.numBuf, math.Float64bits(v.Double()))
hw.h.Write(hw.numBuf)
hw.byteBuf = append(hw.byteBuf, valDoublePrefix...)
hw.byteBuf = binary.LittleEndian.AppendUint64(hw.byteBuf, math.Float64bits(v.Double()))
case pcommon.ValueTypeMap:
hw.h.Write(valMapPrefix)
hw.byteBuf = append(hw.byteBuf, valMapPrefix...)
hw.writeMapHash(v.Map())
hw.h.Write(valMapSuffix)
hw.byteBuf = append(hw.byteBuf, valMapSuffix...)
case pcommon.ValueTypeSlice:
hw.h.Write(valSlicePrefix)
hw.writeSliceHash(v.Slice())
hw.h.Write(valSliceSuffix)
sl := v.Slice()
hw.byteBuf = append(hw.byteBuf, valSlicePrefix...)
for i := 0; i < sl.Len(); i++ {
hw.writeValueHash(sl.At(i))
}
hw.byteBuf = append(hw.byteBuf, valSliceSuffix...)
case pcommon.ValueTypeBytes:
hw.h.Write(valBytesPrefix)
hw.h.Write(v.Bytes().AsRaw())
hw.byteBuf = append(hw.byteBuf, valBytesPrefix...)
hw.byteBuf = append(hw.byteBuf, v.Bytes().AsRaw()...)
case pcommon.ValueTypeEmpty:
hw.h.Write(valEmpty)
hw.byteBuf = append(hw.byteBuf, valEmpty...)
}
}

// hashSum128 returns a [16]byte hash sum.
func (hw *hashWriter) hashSum128() [16]byte {
b := hw.sumHash[:0]
b = hw.h.Sum(b)
r := [16]byte{}
res := r[:]

h := xxhash.Sum64(hw.byteBuf)
res = binary.LittleEndian.AppendUint64(res[:0], h)

// Append an extra byte to generate another part of the hash sum
_, _ = hw.h.Write(extraByte)
b = hw.h.Sum(b)
hw.byteBuf = append(hw.byteBuf, extraByte...)
h = xxhash.Sum64(hw.byteBuf)
_ = binary.LittleEndian.AppendUint64(res[8:], h)

res := [16]byte{}
copy(res[:], b)
return res
return r
}

0 comments on commit 762f843

Please sign in to comment.