-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
37 lines (31 loc) · 1.12 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
embedding_dims = 50
MAX_FEATURES = 72039#184996
MAX_TEXT_LENGTH = 100
BATCH_SIZE = 32
EPOCHS = 2
VALIDATION_SPLIT = 0.1
SPLIT=10000
SPLIT2=20000
CLASSES_LIST = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
path = './data/'
TRAIN_DATA_FILE=path+'train.csv'
TEST_DATA_FILE=path+'test.csv'
embedding_path=path+'crawl-300d-2M.vec.txt'
GLOVE_EMBEDDING_FILE=path+'glove.840B.300d.txt'
train_token_path='./train_corps.txt'
test_token_path='./test_corps.txt'
stop_words = {'the', 'a', 'an'}
stop_words.update(
['.', ',', '"', "'", '?', ':', ';', '(', ')', '[', ']', '{', '}','\'\'','``','...','-','%']) # remove it if you need punctuation
badwords = {}
with open("data/badwords.txt") as fp:
for line in fp:
line = line.lower().strip()
lines = line.split(',')
if len(lines) == 1 and line not in badwords:
badwords[line] = line
elif len(lines) == 2 and lines[0] not in badwords:
badwords[lines[0].strip()] = lines[1].strip().replace(" ", "_")
with open("data/stopwords.txt",encoding="utf-8") as fp:
for line in fp:
stop_words.add(line.strip())