-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathngram_scorer.go
95 lines (84 loc) · 1.86 KB
/
ngram_scorer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"bufio"
"fmt"
"log"
"math"
"os"
"sync"
)
type NgramScore struct {
ngrams map[string]float64
L int
N int
floor float64
}
func NewNgramScore(filename string, sep string) (*NgramScore, error) {
ngrams := make(map[string]float64)
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
var key string
var count int
scanner := bufio.NewScanner(file)
for scanner.Scan() {
_, err := fmt.Sscanf(scanner.Text(), "%s %d", &key, &count)
if err != nil {
return nil, err
}
ngrams[key] = float64(count)
}
if err := scanner.Err(); err != nil {
return nil, err
}
scorer := &NgramScore{ngrams: ngrams, L: len(key)}
scorer.N = 0
for _, v := range ngrams {
scorer.N += int(v)
}
for k, v := range ngrams {
ngrams[k] = math.Log10(v / float64(scorer.N))
}
scorer.floor = math.Log10(0.01 / float64(scorer.N))
return scorer, nil
}
func (scorer *NgramScore) score(text string) float64 {
score := 0.0
for i := 0; i <= len(text)-scorer.L; i++ {
ngram := text[i : i+scorer.L]
if val, ok := scorer.ngrams[ngram]; ok {
score += val
} else {
score += scorer.floor
}
}
return score
}
type NgramScorer struct {
bigrams *NgramScore
trigrams *NgramScore
quadgrams *NgramScore
}
var ngramScoreInstance *NgramScorer
var ngramScoreOnce sync.Once
func GetNgramScorerInstance() *NgramScorer {
ngramScoreOnce.Do(func() {
ngramScoreInstance = &NgramScorer{}
var err error
ngramScoreInstance.bigrams, err = NewNgramScore("./resources/english_bigrams.txt", " ")
if err != nil {
log.Fatal(err)
}
ngramScoreInstance.trigrams, err = NewNgramScore("./resources/english_trigrams.txt", " ")
if err != nil {
log.Fatal(err)
}
ngramScoreInstance.quadgrams, err = NewNgramScore("./resources/english_quadgrams.txt", " ")
if err != nil {
log.Fatal(err)
}
})
return ngramScoreInstance
}