Skip to content

Commit

Permalink
Fix bug: Do not add empty values to the term dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
alldroll committed Dec 3, 2019
1 parent 057a136 commit 571a36c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
10 changes: 8 additions & 2 deletions pkg/lm/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,16 @@ func (dr *dictionaryReader) Iterate(iterator dictionary.Iterator) error {
return err
}

_, _ = tree.Insert(&dictItem{
if len(line[:tabIndex]) == 0 {
continue
}

item := &dictItem{
word: line[:tabIndex],
count: WordCount(count),
})
}

_, _ = tree.Insert(item)
}

if err := lineScanner.Err(); err != nil {
Expand Down
3 changes: 1 addition & 2 deletions pkg/lm/ngram_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func (gr *googleNGramFormatReader) readNGramVector(builder NGramVectorBuilder, o
for scanner.Scan() {
line := scanner.Text()
tabIndex := strings.Index(line, "\t")
nGrams = nGrams[:0]

for _, word := range strings.Split(line[:tabIndex], " ") {
index, err := gr.indexer.Get(word)
Expand All @@ -86,8 +87,6 @@ func (gr *googleNGramFormatReader) readNGramVector(builder NGramVectorBuilder, o
if err := builder.Put(nGrams, WordCount(count)); err != nil {
return fmt.Errorf("failed to add nGrams to a builder: %v", err)
}

nGrams = nGrams[:0]
}

if err := scanner.Err(); err != nil {
Expand Down

0 comments on commit 571a36c

Please sign in to comment.