From 359574dee8562dbf885e4193430dbd711870491d Mon Sep 17 00:00:00 2001 From: teacheRforV <31927761@qq.com> Date: Thu, 12 Mar 2015 18:50:15 -0400 Subject: [PATCH] more efficient version of evoker preparation --- prepare-evoker-input.R | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/prepare-evoker-input.R b/prepare-evoker-input.R index 8e09428..fce410c 100644 --- a/prepare-evoker-input.R +++ b/prepare-evoker-input.R @@ -1,23 +1,21 @@ genoprefix = 'phg000004.ind.geno.' # set the prefix of the genotype files here famfile = list.files()[grep('.fam', list.files())] -famtable = read.table(famfile, sep = ' ', header = FALSE, as.is = TRUE) +famtable = read.table(famfile[1], sep = ' ', header = FALSE, as.is = TRUE) indindex = as.character(famtable$V2) +# indindex = c('5', '9', '26', '33', '35') # for testing -snps = read.table(paste0(genoprefix, indindex[1L]), sep = ',', header = FALSE, as.is = TRUE)[, 1L] - -evoker_df = as.data.frame(matrix(NA, nrow = length(snps), ncol = (2L * length(indindex)) + 1L)) -names(evoker_df) = c('SNP', rep(indindex, each = 2L)) -evoker_df$SNP = snps +snps = read.table(paste0(paste0(genoprefix, indindex[1L]), '.gz'), sep = ',', header = FALSE, as.is = TRUE)[, 1L] for (i in 1L:length(indindex)) { - cat('Merging', i, 'in', length(indindex), '\n') - tmp = read.table(paste0(genoprefix, indindex[i]), - sep = ',', header = FALSE, - colClasses = c(rep('NULL', 8L), - rep('numeric', 2))) # load faster - evoker_df[, 2L * i] = tmp[, 1L] - evoker_df[, 2L * i + 1L] = tmp[, 2L] + cat('Loading', i, 'in', length(indindex), '\n') + eval(parse(text = paste0('tmp_', i, ' = read.table(paste0(paste0(genoprefix, indindex[', i, ']), ".gz"), sep = ",", header = FALSE, colClasses = c(rep("NULL", 8L), rep("character", 2L)))'))) } -write.table(evoker_df, file = 'input.txt', sep = ' ', quote = FALSE, col.names = TRUE, row.names = FALSE) +gc() + +evoker_mat = do.call(cbind, lapply(paste0('tmp_', 1L:length(indindex)), get)) +evoker_mat = cbind(snps, evoker_mat) +colnames(evoker_mat) = c('SNP', rep(indindex, each = 2L)) + +write.table(evoker_mat, file = 'input.txt', sep = ' ', quote = FALSE, col.names = TRUE, row.names = FALSE)