-
Notifications
You must be signed in to change notification settings - Fork 2
/
ClusterCommands.Rhistory
137 lines (137 loc) · 7.22 KB
/
ClusterCommands.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv")
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
csv.data <- read.csv("Documents/ims/abstractnouns/countability/crimeO.csv", header=TRUE)
View(csv.data)
windows()
master <- read.csv("Documents/ims/abstractnouns/master.csv", header=TRUE)
View(master)
plot(master.Plural.Noun.Percentage, master.Bare.Plural.Noun.Percentage)
plot(Plural.Noun.Percentage, Bare.Plural.Noun.Percentage)
plot(master)
plot(master.Noun.Count)
plot(master$Noun.Count)
plot(master$Plural.Noun.Percentage, master$Bare.Plural.Noun.Percentage)
plot(master$Bare.Plural.Noun.Percentage)
install.package("ggplot2")
package.install("ggplot2")
install.packages("ggplot2")
library(ggplot2)
ggplot(master, aes(Plural.Noun.Percentage, Verb.Construction.Percentage))
ggplot(master, aes(Plural.Noun.Percentage, Verb.Construction.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, color = Verb.Construction.Percentage)) +geom_point()
ggplot(master, aes(Verb.Construction.Percentage, Verb.Subject.Percentage, color = Plural.Noun.Percentage)) +geom_point()
ggplot(master, aes(Verb.Construction.Percentage, Verb.Object.Percentage, color = Plural.Noun.Percentage)) +geom_point()
ggplot(master, aes(Verb.Subject.Percentage, Verb.Object.Percentage, color = Plural.Noun.Percentage)) +geom_point()
ggplot(master, aes(Verb.Subject.Percentage, Verb.Object.Percentage, color = Determiner.Percentage)) +geom_point()
ggplot(master, aes(Verb.Subject.Percentage, Verb.Object.Percentage, color = Verb.Negation.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Noun.Percentage, color = F.Ns.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Noun.Percentage, color = F.Ns.Count)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Noun.Percentage, color = A.N.Percentage)) +geom_point()
ggplot(master, aes(Determiner.Percentage, Singular.Noun.Percentage, color = A.N.Percentage)) +geom_point()
ggplot(master, aes(Quantifier.Percentage, Singular.Noun.Percentage, color = A.N.Percentage)) +geom_point()
ggplot(master, aes(Indefinite.Article.Percentage, Singular.Noun.Percentage, color = A.N.Percentage)) +geom_point()
ggplot(master, aes(Indefinite.Article.Percentage, Singular.Noun.Percentage, color = Countable.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Verb.Percentage, color = Countable.Percentage)) +geom_point()
ggplot(master, aes(Bare.Plural.Noun.Percentage, Countable.Percentage)) +geom_point()
plot(master$Bare.Plural.Noun.Percentage, color = master$Countable.Percentage)
ggplot(master, aes(Bare.Plural.Noun.Percentage, Plural.Noun.Percentage, color = Countable.Percentage)) +geom_point()
ggplot(master, aes(Bare.Plural.Noun.Percentage, Plural.Noun.Percentage, color = Uncountable.Percentage)) +geom_point()
ggplot(master, aes(Bare.Plural.Noun.Percentage, Plural.Noun.Percentage, color = Uncountable.Count)) +geom_point()
ggplot(master, aes(Bare.Plural.Noun.Percentage, Plural.Noun.Percentage, color = Uncountable.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Noun.Percentage, color = EX.PL.Count)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Singular.Noun.Percentage, color = Ex.Pl.Count)) +geom_point()
ggplot(master, aes(Plural.Verb.Percentage, Singular.Noun.Percentage, color = Ex.Pl.Count)) +geom_point()
qplot(Possesive.Percentages, data = master, geom = "histogram")
qplot(master$Possesive.Percentages, data = master, geom = "histogram")
qplot(Possesive.Percentages, data = master$Possesive.Percentages, geom = "histogram")
qplot(master$Possesive.Percentages, data = master$Possesive.Percentages, geom = "histogram")
qplot(master$Possesive.Count, data = master$Possesive.Count, geom = "histogram")
qplot(master$Possesive.Percentages, data = master$Possesive.Percentages, geom = "histogram")
hist(master$Possesive.Percentages)
poss <- as.numeric(master$Possesive.Percentages)
hist(poss)
poss
hist(master$Possesive.Count)
hist(master$Possesive.Count/master$Noun.Count)
possperc <- master$Possesive.Count/master$Noun.Count
possperc
hist(possperc)
mean(possperc)
mposs <- mean(possperc)
diff <- abs(possperc - mposs)
hist(diff)
hist(noun, diff)
hist(master$Noun, diff)
plot(master$Noun, diff)
ggplot(master, aes(Noun, diff, color = possperc)) +geom_point()
ggplot(master, aes(Noun, Bare.Plural.Noun.Percentage, color = possperc)) +geom_point()
ggplot(master, aes(Noun, Bare.Plural.Noun.Percentage, color = Plural.Noun.Percentage)) +geom_point()
ggplot(master, aes(Plural.Noun.Percentage, Bare.Plural.Noun.Percentage, color = Plural.Noun.Percentage)) +geom_point()
bareperc <- (Bare.Plural.Noun.Percentage/Plural.Noun.Percentage)
bareperc <- (master$Bare.Plural.Noun.Percentage/master$Plural.Noun.Percentage)
ggplot(master, aes(Noun, bareperc, color = Plural.Noun.Percentage)) +geom_point()
ggplot(master, aes(Noun, Plural.Noun.Percentage, color = bareperc)) +geom_point()
ggplot(master, aes(Countable.Percentage, Plural.Noun.Percentage, color = bareperc)) +geom_point()
ggplot(master, aes(Noun, bareperc, color = Countable.Percentage)) +geom_point()
ggplot(master, aes(Noun, diff, color = possperc)) +geom_point()
set.seed(20)
require(reshape1)
require(reshape2)
df <- melt(data.frame(master$nouns, possperc, diff))
df <- melt(data.frame(master$nouns, possperc))
df <- melt(data.frame(master$nouns, diff))
df <- melt(data.frame(master$Nouns, possperc, diff))
size(master$Nouns)
df <- melt(data.frame(master$Noun, possperc, diff))
possCluster <- kmeans(df, 2, nstart = 20)
df
diff
df <- melt(data.frame(master$Noun, possperc, diff))
df
possperc
df <- melt(data.frame(master$Noun, master$Possesive.Percentage, diff))
df
head(df)
df <- data.frame(master$Noun, possperc, diff)
df
possCluster <- kmeans(df, 2, nstart = 20)
possCluster
df <- model.matrix(~.+0, data = df)
df
df <- data.frame(master$Noun, possperc, diff)
head(df)
possCluster <- kmeans(df[, c(2, 3)], 2, nstart = 20)
possCluster
table(possCluster$cluter, master$Noun)
possCluste$cluster
possCluster$cluster
dim(possCluster$cluster)
length(possCluster$cluster)
length(master$Name)
length(master$Noun)
table(possCluster$cluster, master$Noun)
possCluster$cluster <-as.factor(possCluster$cluster)
head(df)
ggplot(master, aes(Noun, diff, color = possCluster$cluster)) +geom_point()
table(master$Noun, possCluster$cluster)
head(table(master$Noun, possCluster$cluster))
head(master$Noun[possCluster$cluster==1])
savehistory("~/ClusteringCommands.Rhistory")
savehistory("~/Documents/ims/abstractnouns/ClusterCommands.Rhistory")