-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsimple.py
executable file
·60 lines (45 loc) · 1.29 KB
/
simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python3
import markov
import random
import re
def main():
mc = markov.MarkovChain(random.random)
START_OF_LINE = object() # special "word" at the beginning of every line
END_OF_LINE = object() # special "word" at the end of every line
with open('data.txt', 'r', encoding='utf-8') as f:
for line in f.readlines():
line = preprocess(line)
if len(line) == 0: # ignore blank lines
continue
previous_word = START_OF_LINE
for word in line.split():
mc.add(previous_word, word)
previous_word = word
mc.add(previous_word, END_OF_LINE)
mc.compile()
for i in range(8):
line = ""
word = mc.get(START_OF_LINE)
while True:
if word == END_OF_LINE:
break
line = line + " " + word
word = mc.get(word)
print(line.strip())
def preprocess(line):
# lowercase Turkish letters
for pair in zip('ĞÜŞİÖÇIÂÎÛ', 'ğüşiöçıâîû'):
line = line.replace(pair[0], pair[1])
# lowercase all other letters
line = line.lower()
# remove accented Turkish letters
for pair in zip('âîû', 'aıu'):
line = line.replace(pair[0], pair[1])
# remove apostrophes
for char in '\'':
line = line.replace(char, '')
# replace everything else with a space
line = re.sub(r'[^a-zğüşöçı]', ' ', line)
return line
if __name__ == '__main__':
main()