Skip to content

Commit

Permalink
converted r script to a module
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonDMurray committed Mar 20, 2024
1 parent 1ad5d88 commit 1230a7b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 63 deletions.
49 changes: 0 additions & 49 deletions bin/summarize_classification_sintax.R

This file was deleted.

1 change: 0 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ workflow {
//Original scripts used R for wrangling the sintax output, same can be done with a single line of bash code so made the R script an optional module
if (params.r_processing == true) {
R_PROCESSING(VSEARCH_SINTAX.out.tsv)
ch_versions = ch_versions.mix(R_PROCESSING.out.versions.first())
}
else {
PROCESSING(VSEARCH_SINTAX.out.tsv)
Expand Down
63 changes: 50 additions & 13 deletions modules/local/r_processing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,60 @@ process R_PROCESSING {
input:
tuple val(meta), path(sintax_tsv)

//Only process files that have taxonomy predictions
when:
sintax_tsv.size() > 0

output:
tuple val(meta), path('*.classified.tsv') , emit: fasta
path "versions.yml" , emit: versions

script:
"""
if [ ! -s ${sintax_tsv} ]; then
echo "${sintax_tsv} has no sintax predictions" > "${meta.id}.classified.tsv"
else
Rscript ${projectDir}/bin/summarize_classification_sintax.R --input=${sintax_tsv} --output=${meta.id}
fi
md5sum "${meta.id}.classified.tsv" > "${meta.id}.classified.tsv.md5"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
R version: \$(R --version | grep "R version" | sed 's/[(].*//' | sed 's/ //g' | sed 's/[^0-9]*//')
END_VERSIONS
#!/usr/bin/Rscript
library(dplyr)
data <- read.table("${sintax_tsv}", header=F, sep="\t")
data <- data %>% mutate_all(na_if,"")
id <- data.frame(do.call('rbind', strsplit(as.character(data\$V1), ';', fixed=TRUE)))[1]
size <- gsub(";", "", data\$V1)
size <- data.frame(do.call('rbind', strsplit(size,'=',fixed=TRUE)))[2]
size\$X2 <- as.numeric(size\$X2)
classif <- data.frame(do.call('rbind', strsplit(as.character(data\$V2),',',fixed=TRUE)))
k <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X1),'(',fixed=TRUE)))
k\$X1 <- gsub("k:", "", k\$X1)
k\$X2 <- as.numeric(as.character(k\$X2))
p <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X2),'(',fixed=TRUE)))
p\$X1 <- gsub("p:", "", p\$X1)
p\$X2 <- as.numeric(as.character(p\$X2))
c <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X3),'(',fixed=TRUE)))
c\$X1 <- gsub("c:", "", c\$X1)
c\$X2 <- as.numeric(as.character(c\$X2))
o <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X4),'(',fixed=TRUE)))
o\$X1 <- gsub("o:", "", o\$X1)
o\$X2 <- as.numeric(as.character(o\$X2))
f <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X5),'(',fixed=TRUE)))
f\$X1 <- gsub("f:", "", f\$X1)
f\$X2 <- as.numeric(as.character(f\$X2))
g <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X6),'(',fixed=TRUE)))
g\$X1 <- gsub("g:", "", g\$X1)
g\$X2 <- as.numeric(as.character(g\$X2))
s <- data.frame(do.call('rbind', strsplit(gsub(")", "",classif\$X7),'(',fixed=TRUE)))
s\$X1 <- gsub("s:", "", s\$X1)
s\$X2 <- as.numeric(as.character(s\$X2))
classif <- cbind(id, k, p, c, o, f, g, s, size)
colnames(classif)[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)] <- c("sample", "kingdom", "prob_kingdom", "division", "prob_division", "clade", "prob_clade", "order", "prob_order", "family", "prob_family", "genus", "prob_genus", "species", "prob_species", "size")
write.table(classif, file=paste("${meta.id}", ".classified.tsv", sep=""), quote=FALSE, sep='\t', row.names = FALSE)
"""
}

0 comments on commit 1230a7b

Please sign in to comment.