forked from hyginn/rete
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request hyginn#21 from wonjunetai/master
importFilterHypermutators function
- Loading branch information
Showing
6 changed files
with
372 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
# importFilterHypermutators.R | ||
|
||
#' Filters out hypermutators | ||
#' | ||
#' \code{importFilterHypermutators} identifies the number of SNV and CNA | ||
#' mutations for each sample and removes the samples from rSNV and rCNA | ||
#' datasets. Default threshold for removal is 400 mutations per sample. | ||
#' | ||
#' @param rSNVFileIn A vector of local file names of rSNV | ||
#' @param rCNAFileIn A vector of local file names of rCNA | ||
#' @param dOut Directory to store output, defaults to getwd(). | ||
#' @param xS Mutation threshold. 400 by default. | ||
#' @param silent Controls whether output to console should be suppressed. FALSE | ||
#' by default. | ||
#' @param writeLog Controls whether writing the result to the global logfile is | ||
#' enabled. TRUE by default. | ||
#' @param writeDetailedLog Flag for extra details about log. TRUE by default. | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' importFilterHypermutators(rSNVFileIn, rCNAFileIn, dOut, xS) | ||
#' } | ||
#' | ||
#' @export | ||
importFilterHypermutators <- function(rSNVFileIn = c(), | ||
rCNAFileIn = c(), | ||
dOut = getwd(), | ||
xS = 400, | ||
silent = FALSE, | ||
writeLog = TRUE, | ||
writeDetailedLog = TRUE) { | ||
|
||
## VALIDATE PARAMS ######## | ||
# make sure that files actually exist | ||
for (file in rSNVFileIn) { | ||
r <- .checkArgs(file, like = "FILE_E", checkSize = TRUE) | ||
if (length(r) > 0) { | ||
stop(r) | ||
} | ||
} | ||
|
||
for (file in rCNAFileIn) { | ||
r <- .checkArgs(file, like = "FILE_E", checkSize = TRUE) | ||
if (length(r) > 0) { | ||
stop(r) | ||
} | ||
} | ||
|
||
# make sure that output directory is valid | ||
r <- .checkArgs(dOut, like = "DIR", checkSize = TRUE) | ||
if(length(r) > 0) { | ||
stop(r) | ||
} | ||
|
||
r <- c(r, .checkArgs(xS, like = 1, checkSize = TRUE)) | ||
if (length(r) > 0) { | ||
stop(r) | ||
} | ||
|
||
# set up hashes | ||
hashTable <- new.env(hash = TRUE) | ||
totalSamples <- 0 | ||
|
||
## COUNT MUTATIONS ######## | ||
# for each file in rCNAFileIn | ||
for (file in rCNAFileIn) { | ||
# readRDS, count gene CNAs and add to hash for sample @ CNA and nMUT | ||
rCNA <- readRDS(file) | ||
for (sample in colnames(rCNA)[4:length(colnames(rCNA))]) { | ||
totalSamples <- totalSamples + 1 | ||
# if key is in hash, increment CNA and nMUT for every gene if there is an abberation | ||
for (copyNumberValue in rCNA[[sample]]) { | ||
copyNumberValue <- as.double(copyNumberValue) | ||
if (copyNumberValue != 0) { | ||
if (!is.null(hashTable[[sample]])) { | ||
prevCNACount <- hashTable[[sample]]$CNA | ||
prevTotalCount <- hashTable[[sample]]$total | ||
assign(sample, list(CNA = prevCNACount + 1, SNV = 0, total = prevTotalCount + 1), envir = hashTable) | ||
} else { | ||
assign(sample, list(CNA = 1, SNV = 0, total = 1), envir = hashTable) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
# for each file in rSNVFileIn | ||
for (file in rSNVFileIn) { | ||
# readRDS, increment counter for SNV and nMUT | ||
rSNV <- readRDS(file) | ||
for (i in 1:length(rSNV$Tumor_Sample_Barcode)) { | ||
# need to substitute dashes with period for consistency in files | ||
sample <- gsub("-", ".", rSNV$Tumor_Sample_Barcode[i]) | ||
totalSamples <- totalSamples + 1 | ||
|
||
# Present a progress bar. | ||
if (!silent) { | ||
.pBar(i, length(rSNV$Tumor_Sample_Barcode)) | ||
} | ||
|
||
# if key is in hash, increment SNV and nMUT | ||
if (!is.null(hashTable[[sample]])) { | ||
prevCNACount <- hashTable[[sample]]$CNA | ||
prevSNVCount <- hashTable[[sample]]$SNV | ||
prevTotalCount <- hashTable[[sample]]$total | ||
assign(sample, list(CNA = prevCNACount, SNV = prevSNVCount + 1, total = prevTotalCount + 1), envir = hashTable) | ||
} else { | ||
assign(sample, list(CNA = 0, SNV = 1, total = 1), envir = hashTable) | ||
} | ||
} | ||
} | ||
|
||
## ASSESS MUTATIONS AND LOG STATISTICS ####### | ||
|
||
# log global statistics: | ||
numSamplesBothSNVAndCNA <- 0 | ||
numSamplesOnlyCNA <- 0 | ||
numSamplesOnlySNV <- 0 | ||
numSamplesNoChange <- 0 | ||
numSamplesExceedThresh <- 0 | ||
numRemovedSamples <- 0 | ||
removedSamples <- c() | ||
|
||
for (sample in ls(hashTable)) { | ||
# num samples with only CNA | ||
if (hashTable[[sample]]$SNV == 0 && hashTable[[sample]]$CNA > 0) { | ||
numSamplesOnlyCNA <- numSamplesOnlyCNA + 1 | ||
} | ||
# num samples with only SNV | ||
if (hashTable[[sample]]$CNA == 0 && hashTable[[sample]]$SNV > 0) { | ||
numSamplesOnlySNV <- numSamplesOnlySNV + 1 | ||
} | ||
# num samples with both SNV and CNA | ||
if (hashTable[[sample]]$CNA > 0 && hashTable[[sample]]$SNV > 0) { | ||
numSamplesBothSNVAndCNA <- numSamplesBothSNVAndCNA + 1 | ||
} | ||
# num samples that exceeded threshold and need to be removed | ||
if (hashTable[[sample]]$total > xS) { | ||
numRemovedSamples <- numRemovedSamples + 1 | ||
removedSamples <- c(removedSamples, sample) | ||
} | ||
# num samples with no change | ||
if (hashTable[[sample]]$total <= xS) { | ||
numSamplesNoChange <- numSamplesNoChange + 1 | ||
} | ||
} | ||
|
||
## PROCESS FILES AND UPDATE METADATA ########## | ||
|
||
# for each rCNAFileIn | ||
for (file in rCNAFileIn) { | ||
rCNA <- readRDS(file) | ||
|
||
# remove sample if sample in `removedSamples` | ||
newRCNA <- rCNA[, !(names(rCNA) %in% removedSamples)] | ||
|
||
# update metadata | ||
getUUID("newRCNA", overwrite = TRUE) | ||
# ToDo: record updated UUID in log-file | ||
# ToDo: test that UUID is _actually_ being updated. | ||
|
||
|
||
# save new CNAFile with "filtered" prepended to filename | ||
saveRDS(newRCNA, file = paste(dOut, "/filteredHypermutators_", basename(file), sep = "")) | ||
} | ||
|
||
# for each rSNVFileIn | ||
for (file in rSNVFileIn) { | ||
rSNV <- readRDS(file) | ||
|
||
# for each sample (need to gsub("-", ".", sample)), if sample in `removedSamples` remove it | ||
newRSNV <- rSNV[, !(gsub("-", ".", rSNV$Tumor_Sample_Barcode) %in% removedSamples)] | ||
|
||
# update metadata | ||
getUUID("newRSNV", overwrite = TRUE) | ||
# ToDo: record updated UUID in log-file | ||
# ToDo: test that UUID is _actually_ being updated. | ||
|
||
# save new SNVFile | ||
saveRDS(newRSNV, file = paste(dOut, "/filteredHypermutators_", basename(file), sep = "")) | ||
} | ||
|
||
## LOGGING ############### | ||
|
||
if(writeLog) { | ||
|
||
logTitle <- "importFilterHypermutators" | ||
|
||
# Compile function call record | ||
logCall <- character() | ||
logCall[1] <- "importFilterHypermutators(" | ||
logCall[2] <- sprintf("rSNVFileIn = (%s)", paste(rSNVFileIn, collapse(", "))) | ||
logCall[3] <- sprintf("rCNAFileIn = (%s)", paste(rCNAFileIn, collapse(", "))) | ||
logCall[4] <- sprintf("dOut = \"%s\", ", dOut) | ||
logCall[5] <- sprintf("xS = \"%s\", ", as.character(xS)) | ||
logCall[6] <- sprintf("silent = %s, ", as.character(silent)) | ||
logCall[7] <- sprintf("writeLog = %s)", as.character(writeLog)) | ||
logCall <- paste0(logCall, collapse = "") | ||
|
||
# Record progress information | ||
logNotes <- character() | ||
logNotes <- c(logNotes, sprintf("Removed %s of %s samples", numRemovedSamples, totalSamples)) | ||
|
||
if (writeDetailedLog) { | ||
logNotes <- c(logNotes, sprintf("%s of samples had both SNV and CNA", numSamplesBothSNVAndCNA/totalSamples)) | ||
logNotes <- c(logNotes, sprintf("%s of samples had only CNA", numSamplesOnlyCNA/totalSamples)) | ||
logNotes <- c(logNotes, sprintf("%s of samples had only SNV", numSamplesOnlySNV/totalSamples)) | ||
logNotes <- c(logNotes, sprintf("%s of samples had no change", numSamplesNoChange/totalSamples)) | ||
logNotes <- c(logNotes, sprintf("%s of samples exceeded threshold of %s", numSamplesExceedThresh/totalSamples, xS)) | ||
|
||
# Accumulate all removed samples | ||
for (sample in removedSamples) { | ||
logNotes <- c(logNotes, sprintf("%s was removed", sample)) | ||
} | ||
} | ||
|
||
# # send info to log file | ||
logEvent(eventTitle = logTitle, | ||
eventCall = logCall, | ||
notes = logNotes) | ||
} | ||
|
||
} | ||
|
||
# [END] |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# testFilter | ||
# | ||
# | ||
context("Tests for importFilterHypermutators.R") | ||
|
||
# ==== BEGIN SETUP AND PREPARE ================================================= | ||
OLOG <- as.character(getOption("rete.logfile")) # save original logfile name | ||
logFileName(fPath = tempdir(), setOption = TRUE) # make tempdir() the log dir | ||
logName <- unlist(getOption("rete.logfile")) | ||
if (file.exists(logName)) { file.remove(logName)} | ||
|
||
SNVfileName <- 'devSNV.rds' | ||
CNAfileName <- 'devCNA.rds' | ||
|
||
filteredSNVfileName <- paste(getwd(), "/filteredHypermutators_", basename(SNVfileName), sep = "") | ||
filteredCNAfileName <- paste(getwd(), "/filteredHypermutators_", basename(CNAfileName), sep = "") | ||
# ==== END SETUP AND PREPARE =================================================== | ||
|
||
|
||
# ==== importFilterHypermutators() ============================================= | ||
|
||
# set up a tempdir and tempfiles for outputs of importFilterHypermutators | ||
test_that("importFilterHypermutators works correctly on valid input", { | ||
# run importFilterHypermutators on valid input | ||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
dOut = getwd(), | ||
xS = 400, | ||
silent = FALSE, | ||
writeLog = TRUE), NA) | ||
|
||
if (file.exists(filteredSNVfileName)) { file.remove(filteredSNVfileName)} | ||
if (file.exists(filteredCNAfileName)) { file.remove(filteredCNAfileName)} | ||
}) | ||
|
||
test_that("importFilterHypermutators rejects invalid dOut arguments", { | ||
# check for NULL object | ||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
dOut = NULL, | ||
xS = 400, | ||
silent = FALSE, | ||
writeLog = TRUE)) | ||
# check for paths that don't exist | ||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
dOut = 'no/such/path', | ||
xS = 400, | ||
silent = FALSE, | ||
writeLog = TRUE)) | ||
}) | ||
|
||
test_that("importFilterHypermutators rejects invalid xS arguments", { | ||
# check for including NULL objects | ||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
xS = NULL, | ||
silent = FALSE, | ||
writeLog = TRUE)) | ||
# check character | ||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
xS = "400", | ||
silent = FALSE, | ||
writeLog = TRUE)) | ||
}) | ||
|
||
test_that("importFilterHypermutators correctly removes hypermutators", { | ||
# run importFilterHypermutators with a single hypermutator | ||
testCNA <- readRDS(CNAfileName) | ||
testSNV <- readRDS(SNVfileName) | ||
|
||
# start off with a known number of samples in each CNA and SNV file | ||
expect_equal(length(colnames(testCNA)[4:length(colnames(testCNA))]), 579) | ||
expect_equal(length(testSNV$Tumor_Sample_Barcode), 15) | ||
|
||
expect_error(importFilterHypermutators(rSNVFileIn = c(SNVfileName), | ||
rCNAFileIn = c(CNAfileName), | ||
xS = 15, | ||
silent = FALSE, | ||
writeLog = TRUE), NA) | ||
|
||
# check output file | ||
processedCNA <- readRDS(filteredCNAfileName) | ||
processedSNV <- readRDS(filteredSNVfileName) | ||
|
||
# expect that 458 samples are removed from rCNA (579 - 458 = 121) | ||
expect_equal(length(colnames(processedCNA)[4:length(colnames(processedCNA))]), 121) | ||
|
||
# expect that no samples are removed from rSNV | ||
expect_equal(length(processedSNV$Tumor_Sample_Barcode), 15) | ||
|
||
# test cleanup | ||
if (file.exists(filteredSNVfileName)) { file.remove(filteredSNVfileName)} | ||
if (file.exists(filteredCNAfileName)) { file.remove(filteredCNAfileName)} | ||
}) | ||
|
||
# ==== BEGIN TEARDOWN AND RESTORE ============================================== | ||
logName <- unlist(getOption("rete.logfile")) | ||
if (file.exists(logName)) { file.remove(logName)} | ||
options("rete.logfile" = OLOG) | ||
|
||
# another check for function artifacts | ||
if (file.exists(filteredSNVfileName)) { file.remove(filteredSNVfileName)} | ||
if (file.exists(filteredCNAfileName)) { file.remove(filteredCNAfileName)} | ||
# ==== END TEARDOWN AND RESTORE =============================================== | ||
|
||
# [END] |