Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
Merge pull request #262 from cstyan/callum-249
Browse files Browse the repository at this point in the history
Reduce index file size caused by symbol references by using indicies rather than offsets
  • Loading branch information
gouthamve authored Jan 18, 2018
2 parents c7e30f9 + cd67584 commit 467948f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
2 changes: 1 addition & 1 deletion docs/format/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Most of the sections described below start with a `len` field. It always specifi
The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size.

The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded.
Strings are referenced by pointing to the beginning of their length field. The strings are sorted in lexicographically ascending order.
Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order.

```
┌────────────────────┬─────────────────────┐
Expand Down
29 changes: 20 additions & 9 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,17 +273,18 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
w.buf2.putUvarint(len(lset))

for _, l := range lset {
offset, ok := w.symbols[l.Name]
// here we have an index for the symbol file if v2, otherwise it's an offset
index, ok := w.symbols[l.Name]
if !ok {
return errors.Errorf("symbol entry for %q does not exist", l.Name)
}
w.buf2.putUvarint32(offset)
w.buf2.putUvarint32(index)

offset, ok = w.symbols[l.Value]
index, ok = w.symbols[l.Value]
if !ok {
return errors.Errorf("symbol entry for %q does not exist", l.Value)
}
w.buf2.putUvarint32(offset)
w.buf2.putUvarint32(index)
}

w.buf2.putUvarint(len(chunks))
Expand Down Expand Up @@ -341,8 +342,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {

w.symbols = make(map[string]uint32, len(symbols))

for _, s := range symbols {
w.symbols[s] = uint32(w.pos) + headerSize + uint32(w.buf2.len())
for index, s := range symbols {
w.symbols[s] = uint32(index)
w.buf2.putUvarintStr(s)
}

Expand Down Expand Up @@ -381,12 +382,13 @@ func (w *Writer) WriteLabelIndex(names []string, values []string) error {
w.buf2.putBE32int(len(names))
w.buf2.putBE32int(valt.Len())

// here we have an index for the symbol file if v2, otherwise it's an offset
for _, v := range valt.s {
offset, ok := w.symbols[v]
index, ok := w.symbols[v]
if !ok {
return errors.Errorf("symbol entry for %q does not exist", v)
}
w.buf2.putBE32(offset)
w.buf2.putBE32(index)
}

w.buf1.reset()
Expand Down Expand Up @@ -756,11 +758,20 @@ func (r *Reader) readSymbols(off int) error {
basePos = uint32(off) + 4
nextPos = basePos + uint32(origLen-d.len())
)

if r.version == 2 {
nextPos = 0
}

for d.err() == nil && d.len() > 0 && cnt > 0 {
s := d.uvarintStr()
r.symbols[uint32(nextPos)] = s

nextPos = basePos + uint32(origLen-d.len())
if r.version == 2 {
nextPos++
} else {
nextPos = basePos + uint32(origLen-d.len())
}
cnt--
}
return errors.Wrap(d.err(), "read symbols")
Expand Down

0 comments on commit 467948f

Please sign in to comment.