diff --git a/dict_1.16.go b/dict_1.16.go index 5e31686..1042e4c 100644 --- a/dict_1.16.go +++ b/dict_1.16.go @@ -22,6 +22,9 @@ import ( // var dataDict string var ( + //go:embed data/dict/jp/dict.txt + ja string + //go:embed data/dict/zh/t_1.txt zhT string //go:embed data/dict/zh/s_1.txt @@ -68,6 +71,10 @@ func (seg *Segmenter) loadZhST(d string) (begin int, err error) { func (seg *Segmenter) LoadDictEmbed(dict ...string) (err error) { if len(dict) > 0 { d := dict[0] + if d == "ja" { + return seg.LoadDictStr(ja) + } + if d == "zh" { return seg.loadZh() } diff --git a/dict_util.go b/dict_util.go index fa53725..a049008 100644 --- a/dict_util.go +++ b/dict_util.go @@ -217,7 +217,7 @@ func (seg *Segmenter) LoadDict(files ...string) error { return nil } -// GetCurrentFilePath get current file path +// GetCurrentFilePath get the current file path func GetCurrentFilePath() string { _, filePath, _, _ := runtime.Caller(1) return filePath @@ -360,7 +360,7 @@ func DictPaths(dictDir, filePath string) (files []string) { } for i := 0; i < len(fileName); i++ { - if fileName[i] == "jp" { + if fileName[i] == "ja" || fileName[i] == "jp" { dictPath = path.Join(dictDir, "dict/jp/dict.txt") } @@ -384,7 +384,8 @@ func DictPaths(dictDir, filePath string) (files []string) { dictName := fileName[i] != "en" && fileName[i] != "zh" && fileName[i] != "zh_s" && fileName[i] != "zh_t" && - fileName[i] != "jp" && fileName[i] != "ti" + fileName[i] != "ja" && fileName[i] != "jp" && + fileName[i] != "ko" && fileName[i] != "ti" if dictName { dictPath = fileName[i] diff --git a/segmenter_test.go b/segmenter_test.go index cd247f6..e1bbcc9 100755 --- a/segmenter_test.go +++ b/segmenter_test.go @@ -83,7 +83,21 @@ func TestSegment(t *testing.T) { func TestSegmentJp(t *testing.T) { var seg Segmenter // SkipLog = true - seg.LoadDict("data/dict/jp/dict.txt") + err := seg.LoadDict("data/dict/jp/dict.txt") + tt.Nil(t, err) + tt.Equal(t, 794146, len(seg.Dict.Tokens)) + tt.Equal(t, 4.784183005e+09, seg.Dict.totalFreq) + + f, pos, ok := seg.Find("自由") + tt.Bool(t, ok) + tt.Equal(t, "名詞", pos) + tt.Equal(t, 3636, f) + + f, pos, ok = seg.Find("此の度") + tt.Bool(t, ok) + tt.Equal(t, "名詞", pos) + tt.Equal(t, 5257, f) + segments := seg.Segment(testH) tt.Expect(t, "こんにちは/感動詞 世界/名詞 ", ToString(segments, false))