Skip to content

Commit

Permalink
feat: 用 lua_filter 固定字词的顺序(暂时未启用) close #586
Browse files Browse the repository at this point in the history
  • Loading branch information
iDvel committed Jan 15, 2024
1 parent 3c04ce5 commit 4b166f6
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 1 deletion.
87 changes: 87 additions & 0 deletions lua/pin_cand_filter.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
-- 置顶候选项

local function isInList(list, str)
for i, v in ipairs(list) do
if v == str then
return true, i
end
end
return false, 0
end

local M = {}

function M.init(env)
local config = env.engine.schema.config
env.name_space = env.name_space:gsub("^*", "")
-- 遍历要置顶的候选项列表,将其转换为 table 存储到 M.pin_cands
-- 'ta 他 她 它' → M.pin_cands["ta"] = {"他", "她", "它"},
-- 'ni hao 你好 拟好' → M.pin_cands["ni hao"] = {"你好", "拟好"}
local list = config:get_list(env.name_space)
M.pin_cands = {}
for i = 0, list.size - 1 do
local code, texts = list:get_value_at(i).value:match("([^\t]+)\t(.+)")
if code and texts then
M.pin_cands[code] = {}
for text in texts:gmatch("%S+") do
table.insert(M.pin_cands[code], text)
end
end
end
end

function M.func(input)
local pined = {}
local others = {}
local pined_count = 0
for cand in input:iter() do
local pins = M.pin_cands[cand.preedit]
if pins then
-- 给 pined 几个空字符串占位元素,后面直接 pined[idx] = cand 确保 pined 与 pins 顺序一致
if #pined < #pins then
for _ = 1, #pins do
table.insert(pined, '')
end
end
-- 要置顶的放到 pined 中,其余的放到 others
local ok, idx = isInList(pins, cand.text)
if ok then
pined[idx] = cand
pined_count = pined_count + 1
else
table.insert(others, cand)
end
-- 找齐了或者 others 太大了,就不找了,一般前 5 个就找完了
if pined_count == #pins or #others > 50 then
break
end
else
table.insert(others, cand)
break
end
end

-- yield pined others 及后续的候选项
if pined_count > 0 then
-- 如果因配置写了这个编码没有的字词,导致没有找齐,删掉空字符串占位元素
local i = 1
while i <= #pined do
if pined[i] == '' then
table.remove(pined, i)
else
i = i + 1
end
end
for _, cand in ipairs(pined) do
yield(cand)
end
end
for _, cand in ipairs(others) do
yield(cand)
end
for cand in input:iter() do
yield(cand)
end
end

return M
12 changes: 11 additions & 1 deletion rime.lua
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ reduce_english_filter = require("reduce_english_filter")

-- 默认未启用:

-- 词条置顶
-- 满足左边的 cand.preedit 时,将右边的 cand 按顺序置顶。
-- 在 engine/filters 增加 - lua_filter@pin_cand_filter
-- 在方案里写配置项:
-- pin_cand_filter:
-- - "l 了"
-- - "le 了"
-- - "ta 他 她 它"
-- - "ni hao 你好 拟好"
pin_cand_filter = require("pin_cand_filter")

-- 长词优先(全拼)
-- 在 engine/filters 增加 - lua_filter@long_word_filter
-- 在方案里写配置项:
Expand All @@ -45,7 +56,6 @@ reduce_english_filter = require("reduce_english_filter")
-- long_word_filter:
-- count: 2
-- idx: 4
--
-- 使用请注意:之前有较多网友反应有内存泄漏,优化过一些但还是偶尔有较高的内存,但并不卡顿也不影响性能,重新部署后即正常
-- 如果要启用,建议放到靠后位置,最后一个放 uniquifier,倒数第二个就放 long_word_filter
long_word_filter = require("long_word_filter")
Expand Down

0 comments on commit 4b166f6

Please sign in to comment.