-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtesting model.py
58 lines (45 loc) · 1.6 KB
/
testing model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 27 12:11:14 2018
@author: Kartik
"""
import re
import numpy as np
from gensim import corpora, models, similarities
import nltk
from clean_text import clean_sent
import pickle
filename='word2vec.bin'
model = pickle.load(open(filename, 'rb'))
word2int = pickle.load(open('word2int', 'rb'))
model.most_similar(positive=['ron'], topn=7)
word2vec,vectors={},[]
for word,count in word2int.items():
if word in model.wv.vocab:
word2vec[word]=model[word].tolist()
vectors.append(model[word].tolist())
#remove word not in word2vec
for word in list(word2int.keys()):
if word not in list(word2vec.keys()):
del word2int[word]
int2word={}
for word,count in word2int.items():
int2word[count]=word
from sklearn.manifold import TSNE
model = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
vectors = model.fit_transform(np.asarray(vectors))
from sklearn import preprocessing
normalizer = preprocessing.Normalizer()
vectors = normalizer.fit_transform(np.asarray(vectors), '2')
for i,vec in enumerate(vectors):
vectors[i]=[vec[0],vec[1]]
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
for word,_ in word2int.items():
# print(word, vectors[word2int[word]][1])
if word=='harry':
ax.annotate(word, (vectors[word2int[word]][0],vectors[word2int[word]][1] ),textcoords='offset points',xycoords='data',arrowprops=dict(arrowstyle="->"))
else:
ax.annotate(word, (vectors[word2int[word]][0],vectors[word2int[word]][1] ))
plt.show()