-
Notifications
You must be signed in to change notification settings - Fork 0
/
identify.py
82 lines (70 loc) · 2.57 KB
/
identify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import fasttext
import csv
import jieba
import codecs # codecs专门用作编码转换
def main(data_file="./comments.csv"):
stopwords = [w.strip() for w in codecs.open("./train/stopwords_comment.txt", "r", "utf-8").readlines()]
classifier = fasttext.train_supervised('./train/train_data.txt', label_prefix="__label__", min_count=1)
f = open(data_file,"r+",encoding="utf-8")
fr = csv.reader(f)
r=[]
result = {"positive": 0, "negative": 0, "neutral": 0, "suggestion": [], "question": [], "high_praise": [],"more_reply": []}
for i in fr:
temp = {}
temp["name"]=i[1]
temp["comment"]=i[2]
# temp["praise_num"] = i[4]
# temp["reply_num"] = i[3]
r.append(temp)
# print(r)
for i in r:
texts = ' '.join(w for w in jieba.cut(i["comment"]) if w not in stopwords)
label_test = classifier.predict(texts,k=2)[0]
# print(label_test)
if label_test[0]=="__label__positive":
result["positive"]=result["positive"]+1
elif label_test[0]=="__label__negative":
result["negative"]=result["negative"]+1
else:
result["neutral"]=result["neutral"]+1
if "__label__suggestion" in label_test:
result["suggestion"].append(i)
if "__label__question" in label_test:
result["question"].append(i)
data = csv.reader(open('comments.csv', encoding='utf-8'))
# for i in data:
# print(i)
reply_sortedlist = sorted(data, key=lambda x: int(x[3]), reverse=True)
flag1=0
for i in reply_sortedlist:
if flag1<10:
flag1 = flag1+1
temp={}
temp["name"]=i[1]
temp["comment"] = i[2]
temp["reply_num"]=i[3]
result["more_reply"].append(temp)
else:
break
open('comments.csv', encoding='utf-8').close()
data = csv.reader(open('comments.csv', encoding='utf-8'))
praise_sortedlist = sorted(data, key=lambda x: int(x[4]), reverse=True)
# print(praise_sortedlist)
flag2 = 0
for i in praise_sortedlist:
if flag2 < 10:
flag2 = flag2 + 1
temp = {}
temp["name"] = i[1]
temp["comment"] = i[2]
temp["praise_num"] = i[4]
result["high_praise"].append(temp)
else:
break
open('comments.csv', encoding='utf-8').close()
print(result)
return result
if __name__=="__main__":
main()