-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathtest_compress.py
executable file
·197 lines (166 loc) · 5.95 KB
/
test_compress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python
"""
Tests for the compress repo. This is best executed from `run_tests.sh`
in order to keep parity with what's running in CI.
"""
import json
from typing import Counter
import os
from find_suggested_phrases import (
get_possible_abbrevs,
get_best_phrases_to_shorten,
match_abbrevs_to_phrases,
corpus_to_ngrams,
fix_grammer,
load_corpus,
)
from generate_autokeys import create_autokey_config_for_shortcut
from parse_slack import extract_slack_msgs, clean_slack_msg
from preset_abbrevs import BLACKLIST
def test_presets():
# just make sure we can import without crashing
assert "a" in BLACKLIST
def test_extract_slack_msgs():
output = extract_slack_msgs("test_data/test_slack_export", "erik")
assert output == ["test msg 1", "test with words"]
def test_clean_slack_msg():
msg = "test :slightly_smiling_face: and also :smiling_face: and then <@U01SL713VH8>?"
out = clean_slack_msg(msg)
assert out == "test and also and then"
def test_get_abbreviations():
out = get_possible_abbrevs("an") # mainly make sure short inputs don't crash
assert "a" in out
out = get_possible_abbrevs("the")
assert "t" in out
assert "th" in out
out = get_possible_abbrevs("different")
assert "dt" in out
assert "dif" in out
assert "diff" in out
out = get_possible_abbrevs("in the robots")
assert "itr" in out
def test_match_abbrevs():
input_phrases = [
# (score, phrase, phrase_len, count)
(9933, 'the', 3, 9933),
(6462, 'that', 4, 3231),
(4128, 'and', 3, 4128),
(4123, 'the robot', 9, 589),
(3940, 'this', 4, 1970),
(3801, 'robot', 5, 1267),
(3410, 'need to', 7, 682),
(3020, 'in the', 6, 755),
(2954, 'something', 9, 422),
(2952, 'robots', 6, 738),
(2844, 'should', 6, 711),
(2824, 'just', 4, 1412),
(2725, 'for', 3, 2725),
(2652, 'with', 4, 1326),
(2644, 'have', 4, 1322),
(2600, 'i think', 7, 520),
(2594, 'you', 3, 2594),
(2508, 'of the', 6, 627),
(2427, 'think', 5, 809),
(2372, 'on the', 6, 593),
]
expected = {
'about': 'ab',
'and': 'n',
'i think': 'itk',
'the': 't',
'that': 'tt',
'the robot': 'tr',
'this': 'ts',
'robot': 'r',
'need to': 'nt',
'in the': 'e',
'something': 's',
'robots': 'rs',
'should': 'sd',
'just': 'j',
'for': 'f',
'with': 'w',
'have': 'h',
'you': 'y',
'of the': 'ot',
'think': 'tk',
'on the': 'ont',
}
presets = {"about": "ab", "and": "n", "i think": "itk"}
shortcuts = match_abbrevs_to_phrases(input_phrases, presets)
assert expected == shortcuts
def test_get_top_shortcuts():
all_counts = Counter({
("hello",): 10,
("robots",): 3,
("hello", "world"): 5,
})
out = get_best_phrases_to_shorten(all_counts, 2)
# score, phrase, phrase_len, count
expected = [
(45, "hello world", 11, 5),
(30, "hello", 5, 10),
]
assert out == expected
def test_corpus_to_n_grams():
corpus = ["hello world", "hello world goodbye world"]
out = corpus_to_ngrams(corpus, 2)
expected = Counter({
("hello",): 2,
("world",): 3,
("goodbye",): 1,
("hello", "world"): 2,
("world", "goodbye"): 1,
("goodbye", "world"): 1,
})
assert expected == out
def test_load_corpus():
assert set(load_corpus("test_data/test_corpus/", {'.txt'})) == {"hello world", "testing", "also here"}
def test_fix_grammer():
for word, expected in [
("i think", "I think"),
("dont", "don't"),
("it doesnt matter", "it doesn't matter"),
]:
assert expected == fix_grammer(word)
def test_autokey_configs():
# TODO: mock filesystem so none of this touches disk. oh well...
create_autokey_config_for_shortcut("test_because", "bc")
result_path = "output/autokey_phrases/"
main_path = result_path + "testbecause.txt"
config_path = result_path + ".testbecause.json"
with open(main_path, 'r', encoding="utf8") as f:
out = f.readline()
assert out == "test_because"
with open(config_path, 'r', encoding="utf8") as f:
out = json.load(f)
expected = {'usageCount': 0, 'omitTrigger': False,
'prompt': False, 'description': 'test_because', 'abbreviation':
{'wordChars': r"[\w\t'\-&\+]", 'abbreviations': ['bc'], 'immediate': False,
'ignoreCase': True, 'backspace': True, 'triggerInside': False},
'hotkey': {'hotKey': None, 'modifiers': []}, 'modes': [1],
'showInTrayMenu': False, 'matchCase': True, 'filter':
{'regex': 'google-chrome.Google-chrome', 'isRecursive': False},
'type': 'phrase', 'sendMode': 'kb'}
assert out == expected
# some are only set to work in chrome, to avoid coding mistakes
create_autokey_config_for_shortcut("test_i", "i")
result_path = "output/autokey_phrases/"
main_path = result_path + "testi.txt"
config_path = result_path + ".testi.json"
with open(main_path, 'r', encoding="utf8") as f:
out = f.readline()
assert out == "test_i"
with open(config_path, 'r', encoding="utf8") as f:
out = json.load(f)
expected = {'usageCount': 0, 'omitTrigger': False,
'prompt': False, 'description': 'test_i', 'abbreviation':
{'wordChars': r"[\w\t'\-&\+]", 'abbreviations': ['i'], 'immediate': False,
'ignoreCase': True, 'backspace': True, 'triggerInside': False},
'hotkey': {'hotKey': None, 'modifiers': []}, 'modes': [1],
'showInTrayMenu': False, 'matchCase': True, 'filter':
{'regex': 'google-chrome.Google-chrome', 'isRecursive': False},
'type': 'phrase', 'sendMode': 'kb'}
assert out == expected
os.remove(main_path)
os.remove(config_path)