-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsentiment1
56 lines (41 loc) · 2.65 KB
/
sentiment1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import twython #actively maintained, pure Python wrapper for the Twitter API
import pandas as pd #in order to read the CSV file
import numpy as np #for mathematic and conditions
import nltk #download VADER library
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer #importing the sentiment analysis tool
#read Hotel Reviews file
HotelReview = pd.read_csv('C:\Users\pamel\Desktop\College Papers\Data Analytics\\Hotel_Reviews_Fixed2.csv')
#set up conditions to classify the reviews
Conditions = [((HotelReview['Lower_Positive'] == 'everything') & (HotelReview['Lower_Negative'] == 'no negative')) |
((HotelReview['Lower_Positive'] == 'everything') & (HotelReview['Lower_Negative'] == 'nothing')),
((HotelReview['Lower_Negative'] == 'everything') & (HotelReview['Lower_Positive'] == 'no positive')) |
((HotelReview['Lower_Negative'] == 'everything') & (HotelReview['Lower_Positive'] == 'nothing')),
(HotelReview['Lower_Negative'] == 'no negative') | (HotelReview['Lower_Negative'] == 'nothing'),
(HotelReview['Lower_Positive'] == 'no positive') | (HotelReview['Lower_Positive'] == 'nothing')]
#set up choices for the conditions
Choices1 = [1, 2, 3, 4]
#apply conditions and choices on the Hotel Review data
HotelReview['Review_Count'] = np.select(Conditions, Choices1, default=5)
#link the Vader sentiment intensity analyzer
sid = SentimentIntensityAnalyzer()
#create a sentiment analyze function
def SentimentAnalyze():
for i,count in enumerate(HotelReview['Review_Count']):
if count == 1:
HotelReview.loc[i,'Review_Sentiment_Score'] = 1 #return 1 for review count 1
elif count == 2:
HotelReview.loc[i, 'Review_Sentiment_Score'] = -1 #return -1 for review count 2
elif count == 3:
review = HotelReview.loc[i,'Positive_Review']
HotelReview.loc[i, 'Review_Sentiment_Score'] = sid.polarity_scores(review)['compound'] #return compound score for review count 3
elif count == 4:
review = HotelReview.loc[i,'Negative_Review']
HotelReview.loc[i, 'Review_Sentiment_Score'] = sid.polarity_scores(review)['compound'] #return compound score for review count 4
else:
review = HotelReview.loc[i,'Negative_Review'] + ' ' + HotelReview.loc[i,'Positive_Review']
HotelReview.loc[i, 'Review_Sentiment_Score'] = sid.polarity_scores(review)['compound'] #return compound score for review count 5
#apply the function
SentimentAnalyze()
#save the data as new CSV file
HotelReview.to_csv('C:\Users\pamel\Desktop\College Papers\Data Analytics\\Hotel_Review_Final.csv', encoding='utf-8')