-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStopwords.py
55 lines (50 loc) · 1.64 KB
/
Stopwords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import spacy
import nltk
from nltk.corpus import stopwords
import os
# creaating a list of stop words from nltk and spacy. Add new stopwords here
def all_stop_words():
try: # try to get stop words from nltk if not download them
stop_words = stopwords.words('english')
except LookupError:
print('Downloading nltk stop words, try running again')
nltk.download('stopwords')
stop_words = stopwords.words('english')
try: # try to get stop words from spacy if not download them
en_model = spacy.load('en_core_web_sm')
except OSError:
print('Downloading spacy en model, try running again')
os.system('python -m spacy download en_core_web_sm')
en_model = spacy.load('en_core_web_sm')
spacy_stopwords = en_model.Defaults.stop_words
stop_words.append(spacy_stopwords)
stop_words.append('(')
stop_words.append(')')
stop_words.append('[')
stop_words.append('2')
stop_words.append(']')
stop_words.append('{')
stop_words.append('}')
stop_words.append(':')
stop_words.append(';')
stop_words.append('à')
stop_words.append('%')
stop_words.append('!')
stop_words.append('?')
stop_words.append('\'')
stop_words.append('\"')
stop_words.append('``')
stop_words.append('...')
stop_words.append('’')
stop_words.append('“')
stop_words.append('”')
stop_words.append('–')
stop_words.append(',')
stop_words.append('.')
stop_words.append('\'s')
stop_words.append('\'\'')
stop_words.append('-a')
stop_words.append('-an')
stop_words.append('-the')
stop_words.append('$')
return stop_words