-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathword_utilities.py
66 lines (56 loc) · 1.4 KB
/
word_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 17 12:52:39 2018
@author: luo
"""
import jieba.posseg as pseg
from datetime import datetime
FALSE = (
"是错",
"没有",
"不属于",
"不是",
"不能",
"不可以",
"不对",
"不正确",
"不提供",
"不包括",
"不存在",
"不经过",
"未",
"错误"
)
def pre_process_question(keyword):
"""
strip charactor and strip ?
:param question:
:return:
"""
now = datetime.today()
for char, repl in [('\n',''),("“", ""), ("”", ""), ("?", ""), ("《", ""), ("》", ""), ("我国", "中国"),
("今天", "{0}年{1}月{2}日".format(now.year, now.month, now.day)),
("今年", "{0}年".format(now.year)),
("这个月", "{0}年{1}月".format(now.year, now.month))]:
keyword = keyword.replace(char, repl)
keyword = keyword.split(r".")[-1]
keywords = keyword.split(" ")
keyword = "".join([e.strip("\r\n") for e in keywords if e])
return keyword
def parse_false(question):
"""
:param question:
:return:
"""
for item in FALSE:
if item in question:
question = question.replace(item, "")
return question, False
return question, True
'''
initialize jieba Segment
'''
def postag(text):
words = pseg.cut(text)
return words