-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtestplot.py
50 lines (42 loc) · 1.17 KB
/
testplot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pylab
#import sys
import numpy as np
import pandas as pd
from pandas import DataFrame
#import ast
#from itertools import chain
#from collections import Counter
#import csv
infile = 'master.csv'
df = pd.read_csv(infile)
#print df["Noun"]
X = 'Bare Plural Noun Percentage'
y = 'Unit Denumerator Percentage'
z = 'Verb Subject Percentage'
dim = 3 #2 or 3
clusternum = 8 #up to 8
from sklearn.cluster import KMeans
if dim == 2:
subset = df[[X,y]]
if dim ==3:
subset =df[[X,y,z]]
Y = [tuple(x) for x in subset.values]
kmeans = KMeans(n_clusters=clusternum, random_state=0).fit(Y)
df["clusternum"] = kmeans.labels_
plt.figure(3)
fig = pylab.figure()
ax = Axes3D(fig)
use_colors = {0:"b", 1:"g", 2:"r", 3:"c", 4:"m", 5:"y", 6:"k", 7:"w"}
if dim == 2:
plt.scatter(df[X], df[y], c=[use_colors[x] for x in df["clusternum"]])
if dim == 3:
ax.scatter(df[X], df[y], df[z], c=[use_colors[x] for x in df["clusternum"]])
ax.set_zlabel(z)
plt.xlabel(X)
plt.ylabel(y)
plt.show()
newdf = df.sort(["clusternum"])
for num in range(clusternum):
print [df["Noun"][i] for i in range(len(newdf)) if df["clusternum"][i]==num]