-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlasso_validator.R
82 lines (63 loc) · 3.61 KB
/
lasso_validator.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
################################## lasso_validator.R ###################################
# Function: Validate the prediction generated from lasso_predictor.R with real data #
# Usage: R --no-save < lasso_validator.R --args dir subdir type lasso #
# Arguments: dir = directory for expected data #
# subdir = directory for the prediction and output validation laasso #
# output = type_lasso_validator_v1.txt type_lasso_validator_v2.txt #
# Author: Chelsea Ju #
# Date: 2013-12-04 #
# Last Modify: 2013-12-22 #
########################################################################################
# self-defined function
read_expectation <- function(file){
data <- read.table(file);
c1 <- data$V1;
row_names <- unlist(strsplit(as.character(c1), "_"));
# index_vector <- c(1:length(row_names));
# row_names <- row_names[which(index_vector %% 2 == 0)];
rownames(data) <- row_names;
colnames(data) <- c("Name", "Expected");
data;
}
read_prediction <- function(file, row_order){
data <- read.table(file);
rownames(data) <- data[,1];
data <- data[row_order,];
colnames(data) <- c("Name", "Predicted");
data;
}
# read in arguments
options <- commandArgs(trailingOnly = TRUE);
if(length(options) != 4){
stop(paste("Invalid Arguments\n",
"Usage: R--no-save --slave < lasso_predictor.R --args dir subdir type\n",
"\t dir = directory for expected data\n",
"\t subdir = directory for the prediction and output validation \n",
"\t type = genes or transcripts\n",
"\t lasso = genlasso or glmnet\n"),
sep="");
}
dir <- options[1];
subdir <- options[2];
type <- options[3];
lasso <- options[4];
expected_file <- paste(dir,"/", subdir, "/tophat_out/", type, "_expected_read_count.txt", sep="");
expectation <- read_expectation(expected_file);
predicted_file_v1 <- paste(dir,"/", subdir, "/lasso_out/", type, "_", lasso, "_prediction_v1.txt", sep="");
predicted_file_v2 <- paste(dir,"/", subdir, "/lasso_out/", type, "_", lasso, "_prediction_v2.txt", sep="");
prediction_v1 <- read_prediction(predicted_file_v1, rownames(expectation));
prediction_v2 <- read_prediction(predicted_file_v2, rownames(expectation));
## validate analysis
validation_v1 <- cbind(expectation$Expected, prediction_v1$Predicted, abs(prediction_v1$Predicted - expectation$Expected), abs(prediction_v1$Predicted - expectation$Expected) / expectation$Expected);
validation_v2 <- cbind(expectation$Expected, prediction_v2$Predicted, abs(prediction_v2$Predicted - expectation$Expected), abs(prediction_v2$Predicted - expectation$Expected) / expectation$Expected);
colnames(validation_v1) <- c("Expected", "Predicted", "Absolute Error", "(Prediction - Expectation)/Expectation");
colnames(validation_v2) <- c("Expected", "Predicted", "Absolute Error", "(Prediction - Expectation)/Expectation");
rownames(validation_v1) <- rownames(prediction_v1);
rownames(validation_v2) <- rownames(prediction_v2);
## write to file
output_v1 <- paste(dir, "/", subdir, "/lasso_out/", type, "_", lasso, "_validation_v1.txt", sep="");
output_v2 <- paste(dir, "/", subdir, "/lasso_out/", type, "_", lasso, "_validation_v2.txt", sep="");
write.table(validation_v1, file = output_v1, sep="\t");
write.table(validation_v2, file = output_v2, sep="\t");
print(paste("Writing the validation to ", output_v1, sep=""));
print(paste("Writing the validation to ", output_v2, sep=""));