-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentimizer.py
92 lines (78 loc) · 3.37 KB
/
sentimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
"""
@author: metalcorebear
"""
"""
This library will measure sentiment around specific entities within text. It is built on NLTK, Spacy, and NRCLex. Output is a dictionary that can be analyzed further, graphed, formulated into a wordcloud, etc.
https://github.com/explosion/spaCy
https://github.com/nltk
https://github.com/metalcorebear/NRCLex
"""
import os
import nltk
import spacy
from nrclex import NRCLex
nltk.download('punkt')
nltk.download('vader_lexicon')
nltk.download('stopwords')
os.system('python -m spacy download en_core_web_sm')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
class SentiMizer():
'''
The SentiMizer class will allow a user to find entities within text and measure sentiment surrounding those entities.
'''
def __init__(self):
self.text = None
self.entities = None
self.sentiments = None
self.sentences = None
self.affect = None
def load_text(self, text : str):
self.text = text
def append_text(self, text : str):
if self.text == None:
self.text = text
else:
self.text = self.text + ' ' + text
def find_entities(self, model="en_core_web_sm", entity_types_of_interest = ['NORP', 'ORG', 'PERSON', 'FAC', 'GPE', 'LOC', 'EVENT']):
nlp = spacy.load(model)
entities = nlp(self.text)
self.entities = dict([(str(x), x.label_) for x in entities.ents if x.label_ in entity_types_of_interest])
ents = list(self.entities.keys())
self.sentences = dict()
sentences = sent_tokenize(self.text)
for ent in ents:
sent_list = []
for sentence in sentences:
if ent in sentence:
sent_list.append(sentence)
self.sentences.update({ent:' '.join(sent_list)})
def emote(self, entity_type=None):
if self.text != None:
if self.entities == None:
print('Error: Find entities first.')
# NLTK Vader sentiment scores
vader = SentimentIntensityAnalyzer()
self.sentiments = dict()
self.affect = dict()
# Filter by entity type
if entity_type != None:
filtered_entities = {key:value for (key, value) in self.entities.items() if value == entity_type}
entity_list = list(filtered_entities.keys())
filtered_sentences = {key:value for (key, value) in self.sentences.items() if key in entity_list}
else:
filtered_sentences = self.sentences
filtered_entities = self.entities
for key in filtered_sentences:
pol = vader.polarity_scores(filtered_sentences[key])
self.sentiments.update({key:pol['compound']})
# NRCLex affect scores
for key in filtered_sentences:
aff = NRCLex(filtered_sentences[key])
affect_frequencies = aff.affect_frequencies
if 'anticip' in affect_frequencies:
del affect_frequencies['anticip']
self.affect.update({key:affect_frequencies})
else:
print('Error: No text to analyze. Load text using the load_text() method.')