-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsentiment_c.py
83 lines (66 loc) · 1.96 KB
/
sentiment_c.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
"""
Created on Sun May 26 12:53:12 2019
@author: lliu9
"""
from mrjob.job import MRJob
import csv
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
#read in the selected indices
anskey={}
with open("index_c.txt") as f:
for line in f:
pair = line.split("\t")
try:
anskey[pair[0][1:-1]] = float(pair[1][2:-3])
except ValueError:
pass
def sentiment_analyzer_scores(sentence):
'''
function takes in a string input
returns the normalized sentiment score
'''
analyser = SentimentIntensityAnalyzer()
score = analyser.polarity_scores(sentence)
return score["compound"]
class MRScore(MRJob):
'''
input: output from the filter.py file
'''
def mapper(self, _, line):
'''
key: month (could be modfied to day or year)
value: tuple of sentiment score and viewcount of the question
'''
line = csv.reader([line]).__next__()
try:
if line[1] =='2':
ID = line[0]
body = line[7]
tt = line[4]
try:
mon = tt[:7]
if ID in anskey :
senti = sentiment_analyzer_scores(body)
viewc = anskey[ID]
yield mon, (senti, viewc)
except ValueError:
pass
except IndexError:
pass
def reducer(self, mon, scores):
'''
key: month
value: [0,(1,2)]
0: average of the sentimenviewcount
1: average of the viewcount
2: counts of elements assciated with the key
'''
res = np.array(list(scores))
listlen = len(res)
res.reshape(listlen, 2)
resm = np.mean(res, axis=0)
yield mon, [resm[0], (resm[1], listlen)]
if __name__ == '__main__':
MRScore.run()