Skip to content

Commit

Permalink
Merge pull request #57 from MichaelLampe/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
MichaelLampe committed Jul 23, 2015
2 parents 3540756 + f446ee8 commit 7505a3d
Show file tree
Hide file tree
Showing 76 changed files with 1,356 additions and 6,346 deletions.
9 changes: 9 additions & 0 deletions R/Main/GLSeq.Alignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ if (aAlgor == "Cushaw"){
source ("GLSeq.CUSHAW.R")
}

################################################
#Cushaw w/ and w/o GPU Accel Alignment Protocol
################################################
if (aAlgor == "Cushaw_GPU"){
occured <- TRUE
source ("GLSeq.CUSHAW.R")
}


################################################
# TopHat Alignment Protocol
################################################
Expand Down
70 changes: 21 additions & 49 deletions R/Main/GLSeq.BWA.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ source("GLSeq.Util.R")
source("GLSeq.Alignment.Functions.R")
setwd(dest.dir)

comm.stack.pool <- NULL

indCopy <- copyGenome(base.dir,rGenome,refFASTAname,dest.dir)
indCopy <- copy.genome(base.dir,rGenome,refFASTAname,dest.dir)
system(indCopy)
###################################################################################
# Index the BWA
Expand All @@ -14,15 +12,22 @@ system(index)
#
for (zz in 1:nStreams) {
# Assembly and runing the system command, one library at a time:
# For the very first assembly in the stack:
if (zz == 1) comm.stack.pool <- "date"
if (zz != 1) comm.stack.pool <- paste(comm.stack.pool,"date")

for (i in rangelist[[zz]]) {
###################
# Alignment with SAM output
###################
# Grab and name everything correctly
# Names of current fastq files:
fq.left <- fqfiles.table[i,1]
if (paired.end) fq.right <- fqfiles.table[i,2]
name <- assign.name(fqfiles.table[i,1],paired.end)
name <- fq.left
if (paired.end){
name <- substr(name,1,nchar(name) - 5)
} else{
name <- substr(name,1,nchar(name) - 3)
}
this.resName <- assign.resName(name,text.add)
unsorted.sam <- paste(this.resName, "unsorted", sep=".")
# Names of the expected sai files
# Sai files are an intermediate file type for BWA only.
# Read the original paper for more info: http://bioinformatics.oxfordjournals.org/content/25/14/1754.full
Expand All @@ -38,32 +43,6 @@ for (zz in 1:nStreams) {
# We take the output of the Sam*e program and appends the content to the unsorted SAM file
if (paired.end) sam.create <- paste(bwaPath, "sampe", refFASTAname, sainame.left, sainame.right, fq.left, fq.right, ">>", unsorted.sam)
if (!(paired.end)) sam.create <- paste(bwaPath, "samse", refFASTAname, sainame.left, fq.left, ">>", unsorted.sam)
#
###################
# SAM file Cleanup
###################
# Picard tools has a Sam cleaner that helps us out here
cleanSAM <- paste("java -Xmx2g -jar ",picardToolsPath, "CleanSam.jar", sep="")
# We give the file that will be output from the CleanSam.jar the Resname + a cleaned.sam suffix
cleaned.sam <- paste(this.resName, "cleaned.sam", sep=".")
#
# SAM cleanup system command:
# I = Input file; O= Output file
cleansam.comm <- paste(cleanSAM, " I=", unsorted.sam, " O=", cleaned.sam, sep="") # System command #4
#
###################
# Adding RG Header + sorting
###################
# Picard tool also lets us modifty the Read group headers
headersortSAM <- paste("java -Xmx2g -jar ",picardToolsPath, "AddOrReplaceReadGroups.jar", sep="")
# Name of the processed (final) SAM file
final.sam <- paste(this.resName, "final.sam", sep=".")
#
# Implement the logic for sequencer specificity here.
#
# This solution may not work for data derived from Non-illumina sequencing data. Haven't tested that yet as I don't have any data to.
# I = Input file; O= Output File
finalsam.comm <- paste(headersortSAM, " I=", cleaned.sam, " O=", final.sam, " SO=coordinate LB=", refFASTAname, " PL=ILLUMINA PU=unknown SM=", this.resName, " VALIDATION_STRINGENCY=LENIENT", sep="")
###################
# SAM => BAM file with index
###################
Expand All @@ -76,7 +55,7 @@ for (zz in 1:nStreams) {
# -t = TAB-delimited file
# I'm not sure if we need to first pipe it from into the sorted.arg format and index or if we could just sort
# This is something I will test in the future to possibly speed up the protocol a bit.
bam.create <- paste("samtools view -uS -t ", ref.index, final.sam, " | samtools sort - ", sorted.arg)
bam.create <- paste("samtools view -uS -t ", ref.index, unsorted.sam, " | samtools sort - ", sorted.arg)
bam.index <- paste("samtools index", sorted.bam) # System command #7
###################
# Converting the final bam (coordinate-sorted)
Expand Down Expand Up @@ -105,8 +84,9 @@ for (zz in 1:nStreams) {
# Housekeeping
###################
# We remove a bunch of files here to make sure we don't clog up the hard drives too much with all the intermediate steps
if (paired.end) spaceCleanup <- paste("rm", sainame.left, "&& rm", sainame.right, "&& rm", unsorted.sam, "&& rm", cleaned.sam, "&& rm", final.sam, "&& rm", paired.bam)
if (!(paired.end)) spaceCleanup <- paste("rm", sainame.left, "&& rm", unsorted.sam, "&& rm", cleaned.sam, "&& rm", final.sam, "&& rm", paired.bam)
bai <- paste(sorted.bam,"bai",sep=".")
if (paired.end) spaceCleanup <- paste("rm",fq.left,"&& rm",fq.right,"&& rm", sainame.left, "&& rm", sainame.right, "&& rm", unsorted.sam, "&& rm", paired.bam,"&& rm",sorted.bam,"&& rm", bai)
if (!(paired.end)) spaceCleanup <- paste("rm", sainame.left, "&& rm",fq.left,"&& rm", unsorted.sam,"&& rm", paired.bam,"&& rm",sorted.bam,"&& rm", bai)
# Paired Ended Samples vs Unpaired
# Paired end align two files are once (Because they are paired)
# So we add in both teh right and the left.
Expand All @@ -116,25 +96,17 @@ for (zz in 1:nStreams) {
if (!paired.end){
comm.i <- paste(aln.left)
}
comm.i <- paste(comm.i,"&&",sam.create,"&&",cleansam.comm,"&&",finalsam.comm,"&&",bam.create,"&&",bam.index,"&&",countable.comm)
comm.i <- paste(comm.i,"&&",sam.create,"&&",bam.create,"&&",bam.index,"&&",countable.comm)
# Counting
# Whatever was added to count.comm when it looked for counting protocols is added here
comm.i <- paste(comm.i,"&&",count.comm)
# Cleanup File
# Removes unneeded files.
comm.i <- paste(comm.i,"&&",spaceCleanup)
# For the very first assembly in the stack:
if (i == rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, "date && ", comm.i)
# for subsequent assemblies of every stack:
if (i != rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, " && date && ", comm.i)
# system(comm.i)
if (resCollect == "collect"){
collLog <- paste(destDirLog, text.add, ".ResultsCollectLog.txt", sep="")
collerr <- paste(destDirLog, text.add, ".ResultsCollectErrors.txt", sep="")
collResults <- paste("cd ", base.dir, " && ", "Rscript GLSeqResultsCollect.R ", text.add, base.dir, dest.dir, " 0 1>> ", collLog, " 2>> ", collerr, sep="")
if (is.null(comm.stack.pool)) comm.stack.pool <- paste(collResults)
if (!is.null(comm.stack.pool)) comm.stack.pool <- paste(comm.stack.pool,"&&",collResults)
}
# For the very first assembly in the stack:
# For the very first assembly in the stack (i = 1)
comm.stack.pool <- paste(comm.stack.pool,"&&",comm.i)
} # for i
comm.stack.pool <- paste(comm.stack.pool,"&")
}
Expand Down
2 changes: 1 addition & 1 deletion R/Main/GLSeq.Bowtie.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ setwd(dest.dir)

comm.stack.pool <- NULL

indCopy <- copyGenome(base.dir,rGenome,refFASTAname,dest.dir)
indCopy <- copy.genome(base.dir,rGenome,refFASTAname,dest.dir)
system(indCopy)
####################
# Index the Bowtie or Bowtie2 Aligner
Expand Down
12 changes: 6 additions & 6 deletions R/Main/GLSeq.CUSHAW.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ system(index)
### Our NVidia Titan can sometimes run two at a time without alignment corruption, but this is a safer route.
####################################
sam.create <- "date"
if (GPU.accel){
if (aAlgor == "Cushaw_GPU"){
for (zz in 1:nStreams) {
for (i in rangelist[[zz]]) {
###################
Expand Down Expand Up @@ -113,7 +113,7 @@ for (zz in 1:nStreams) {
###################
# If no GPU, we can run all of the above files in parallel
###################
if (!GPU.accel){
if (aAlgor == "Cushaw"){
if (paired.end) sam.create<- paste(CUSHAW.path, "-r", refFASTAname, "-q", fq.left, fq.right, "-o", unsorted.sam , "-t", nCores)
if (!paired.end) sam.create <- paste(CUSHAW.path, "-r", refFASTAname, "-f", fq.left, "-o", unsorted.sam,"-t", nCores)
}
Expand Down Expand Up @@ -150,15 +150,15 @@ for (zz in 1:nStreams) {
###################
# Current command:
###################
if (!GPU.accel) comm.i <- paste(sam.create, "&&",bam.create,"&&",bam.index,"&&",convert.to.sam)
if (GPU.accel) comm.i <- paste(bam.create,"&&",bam.index,"&&",convert.to.sam)
if (aAlgor == "Cushaw") comm.i <- paste(sam.create, "&&",bam.create,"&&",bam.index,"&&",convert.to.sam)
if (aAlgor == "Cushaw_GPU") comm.i <- paste(bam.create,"&&",bam.index,"&&",convert.to.sam)
if (count.comm != "") comm.i <- paste(comm.i, "&&", count.comm)
#
# For the very first assembly in the stack (i = 1)
if (i == rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, "cd",dest.dir,"&&", comm.i)
if (i == rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, "cd date","&&", comm.i)
#
# For subsequent assemblies of every stack (i > 1)
if (i != rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, "&&","cd",dest.dir,"&&", comm.i)
if (i != rangelist[[zz]][1]) comm.stack.pool <- paste(comm.stack.pool, "&&","cd date","&&", comm.i)
# system(comm.i)
} # for i
comm.stack.pool <- paste(comm.stack.pool,"&")
Expand Down
2 changes: 1 addition & 1 deletion R/Main/GLSeq.HTSeq.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ countOpt <- paste(countOpt, " --idattr=", idAttr, sep="")
# Throws the output into a counting file and then moves the output over into the HTSeq folder.
HtSeq.comm <- paste("python -m HTSeq.scripts.count",countOpt, countable.sam, refGFFname, ">", countfile,"&&","mv",countfile,destDirHTSeqCount)
# Adds onto the end of or creates the count.comm which communicates the counting routine to the main command pool
if (count.comm != "") count.comm <- paste(count.comm, ";", HtSeq.comm)
if (count.comm != "") count.comm <- paste(count.comm, "&&", HtSeq.comm)
if (count.comm == "") count.comm <- paste(HtSeq.comm)
17 changes: 9 additions & 8 deletions R/Main/GLSeq.TopHat.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,20 @@
#########################################################
#
#
source("GLSeq.Alignment.Functions.R")
source("GLSeq.Util.R")
setwd(dest.dir)

comm.stack.pool <- NULL

#
####################################
# Copy genome indices to the destimation dir:
####################################
indCopy <- copy.genome(base.dir,rGenome,refFASTAname,dest.dir)
system(indCopy)
####################

file.name.change <- "date"
if (paired.end){
for (zz in 1:nStreams) {
Expand All @@ -35,14 +44,6 @@ if (paired.end){
system(file.name.change)
}
}
#
####################################
# Copy genome indices to the destimation dir:
####################################
ref.dir <- paste(base.dir, rGenome, sep="")
indCopy <- paste("cd ", ref.dir, " && cp ",refFASTAname," ",dest.dir, sep="")
system(indCopy)
####################
# Index for TopHat Aligner with Bowtie2
#
# Prepare reference, what aligner to use, what reference to use, file name
Expand Down
3 changes: 1 addition & 2 deletions R/Main/GLSeq.top.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,9 @@ if (alignment == "noalignment") {

if (counting == "counting"){
if ("RSEM" %in% cAlgor){
RSEM.finish(comm.stack.pool,destDirRSEMCount,dest.dir)
comm.stack.pool <- RSEM.finish(comm.stack.pool,destDirRSEMCount,dest.dir)
}
}
save.run.data(base.dir,text.add)
execute.comm.stack(comm.stack.pool,log.file)
add.to.logs(paste("The process took:",(proc.time()[3]-start.time[3]),"seconds to complete."),log.file)
stop("Program complete.")
Expand Down
Binary file modified Tests/GLSeq Testing Combinations.xlsx
Binary file not shown.
Binary file added User Interface/GLSeq2.1_UI.jar
Binary file not shown.
Binary file removed User Interface/GLseq2_UI.jar
Binary file not shown.
25 changes: 0 additions & 25 deletions User Interface/Raw Java Files/Tests/JsonTest.java

This file was deleted.

9 changes: 0 additions & 9 deletions User Interface/Raw Java Files/Tests/Test_Main.java

This file was deleted.

Loading

0 comments on commit 7505a3d

Please sign in to comment.