Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
Fix reader for large index files.
Browse files Browse the repository at this point in the history
Currently the offsets are cast into uint32 even though the index can
grow larger than 4GiB.

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
  • Loading branch information
gouthamve committed Feb 5, 2018
1 parent 44dd5e1 commit bb0e74b
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,11 @@ func NewWriter(fn string) (*Writer, error) {
if err != nil {
return nil, err
}
defer df.Close() // close for flatform windows
defer df.Close() // Close for platform windows.

if err := os.RemoveAll(fn); err != nil {
return nil, errors.Wrap(err, "remove any existing index at path")
}

f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0666)
if err != nil {
Expand Down Expand Up @@ -530,8 +534,8 @@ type Reader struct {
c io.Closer

// Cached hashmaps of section offsets.
labels map[string]uint32
postings map[labels.Label]uint32
labels map[string]uint64
postings map[labels.Label]uint64
// Cache of read symbols. Strings that are returned when reading from the
// block are always backed by true strings held in here rather than
// strings that are backed by byte slices from the mmap'd index file. This
Expand Down Expand Up @@ -595,8 +599,8 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
b: b,
c: c,
symbols: map[uint32]string{},
labels: map[string]uint32{},
postings: map[labels.Label]uint32{},
labels: map[string]uint64{},
postings: map[labels.Label]uint64{},
crc32: newCRC32(),
version: version,
}
Expand All @@ -617,7 +621,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
}
var err error

err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint32) error {
err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
if len(key) != 1 {
return errors.Errorf("unexpected key length %d", len(key))
}
Expand All @@ -627,7 +631,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) {
if err != nil {
return nil, errors.Wrap(err, "read label index table")
}
err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint32) error {
err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint64) error {
if len(key) != 2 {
return errors.Errorf("unexpected key length %d", len(key))
}
Expand Down Expand Up @@ -780,7 +784,7 @@ func (r *Reader) readSymbols(off int) error {
// readOffsetTable reads an offset table at the given position calls f for each
// found entry.f
// If f returns an error it stops decoding and returns the received error,
func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) error {
func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) error {
d := r.decbufAt(int(off))
cnt := d.be32()

Expand All @@ -791,7 +795,7 @@ func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) err
for i := 0; i < keyCount; i++ {
keys = append(keys, d.uvarintStr())
}
o := uint32(d.uvarint())
o := d.uvarint64()
if d.err() != nil {
break
}
Expand Down

0 comments on commit bb0e74b

Please sign in to comment.