Skip to content

Commit

Permalink
Merge pull request #151 from go-ego/range-pr
Browse files Browse the repository at this point in the history
add more language embed and option support
  • Loading branch information
vcaesar authored May 2, 2022
2 parents d31f3a4 + 37e79df commit 1ff79cb
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
7 changes: 7 additions & 0 deletions dict_1.16.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ import (
// var dataDict string

var (
//go:embed data/dict/jp/dict.txt
ja string

//go:embed data/dict/zh/t_1.txt
zhT string
//go:embed data/dict/zh/s_1.txt
Expand Down Expand Up @@ -68,6 +71,10 @@ func (seg *Segmenter) loadZhST(d string) (begin int, err error) {
func (seg *Segmenter) LoadDictEmbed(dict ...string) (err error) {
if len(dict) > 0 {
d := dict[0]
if d == "ja" {
return seg.LoadDictStr(ja)
}

if d == "zh" {
return seg.loadZh()
}
Expand Down
7 changes: 4 additions & 3 deletions dict_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (seg *Segmenter) LoadDict(files ...string) error {
return nil
}

// GetCurrentFilePath get current file path
// GetCurrentFilePath get the current file path
func GetCurrentFilePath() string {
_, filePath, _, _ := runtime.Caller(1)
return filePath
Expand Down Expand Up @@ -360,7 +360,7 @@ func DictPaths(dictDir, filePath string) (files []string) {
}

for i := 0; i < len(fileName); i++ {
if fileName[i] == "jp" {
if fileName[i] == "ja" || fileName[i] == "jp" {
dictPath = path.Join(dictDir, "dict/jp/dict.txt")
}

Expand All @@ -384,7 +384,8 @@ func DictPaths(dictDir, filePath string) (files []string) {
dictName := fileName[i] != "en" &&
fileName[i] != "zh" &&
fileName[i] != "zh_s" && fileName[i] != "zh_t" &&
fileName[i] != "jp" && fileName[i] != "ti"
fileName[i] != "ja" && fileName[i] != "jp" &&
fileName[i] != "ko" && fileName[i] != "ti"

if dictName {
dictPath = fileName[i]
Expand Down
16 changes: 15 additions & 1 deletion segmenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,21 @@ func TestSegment(t *testing.T) {
func TestSegmentJp(t *testing.T) {
var seg Segmenter
// SkipLog = true
seg.LoadDict("data/dict/jp/dict.txt")
err := seg.LoadDict("data/dict/jp/dict.txt")
tt.Nil(t, err)
tt.Equal(t, 794146, len(seg.Dict.Tokens))
tt.Equal(t, 4.784183005e+09, seg.Dict.totalFreq)

f, pos, ok := seg.Find("自由")
tt.Bool(t, ok)
tt.Equal(t, "名詞", pos)
tt.Equal(t, 3636, f)

f, pos, ok = seg.Find("此の度")
tt.Bool(t, ok)
tt.Equal(t, "名詞", pos)
tt.Equal(t, 5257, f)

segments := seg.Segment(testH)

tt.Expect(t, "こんにちは/感動詞 世界/名詞 ", ToString(segments, false))
Expand Down

0 comments on commit 1ff79cb

Please sign in to comment.