forked from vineet1992/Gene-Selection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHHSVM_O.R
35 lines (27 loc) · 1.57 KB
/
HHSVM_O.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
library(gcdnet)
#setwd("C:/Users/vinee_000/Documents/CS Academic Stuff/Graduate/Current Projects/Undergraduate Advising/Marcus_Dan")
data <- read.csv('GSE2990_2.csv', header = FALSE, sep=',') ##loads the data
n <- dim(data)[1] ##this is a count of how many samples
lim <- floor(n/3) ##this gives a value 1/3 of the count, to be used to create training and testing sets
x <- data[,1:22283] ##separates out the predictors
y <- data[,22284] ##creates a vector of labels; already is of type integer?
rand <- sample(1:n, n) ##creates a random permutation on the number of samples, to be used as indices to get random training/testing data
x_tr <- x[rand[(lim+1):n],] ##split into train and test sets
x_te <- x[rand[1:lim],]
#this uses data exactly as read in from file
#y_tr <- y[rand[(lim+1):n]] ##split into corresponding train and test sets
#y_te <- y[rand[1:lim]]
#m <- cv.gcdnet(x_tr, y_tr, nfolds=10, pred.loss = "misclass") ##trains a model
#pclass <- predict(m$gcdnet.fit, x_te, s=m$lambda.1se) ##uses the trained model to make predictions on unseen data
#this converts y to two-level factor before training model
yf <- as.factor(y)
levels(yf) = c("A","B")
yf_tr <- yf[rand[(lim+1):n]] ##split into corresponding train and test sets
yf_te <- yf[rand[1:lim]]
x_tr = as.matrix(x_tr)
m2 =cv.gcdnet(x_tr, yf_tr, method ="hhsvm",lambda2=0.01, pred.loss="misclass", nfolds=5)
x_te = as.matrix(x_te)
plot(m2)
preds = predict(m2$gcdnet.fit,x_te,s=m2$lambda.min,type="link")
preds
probs = 1/(1+exp(-preds)) ##These contain the probability of predicting the positive class