diff --git a/CHANGELOG.md b/CHANGELOG.md index 101448b..de1cc28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.3 - 2024-02-23 + +- Improved TPM file splitting performance (Issue #120). +- Fixed an issue where R scripts altered sample names upon loading (Issue #122). + ## v1.0.2 - 2024-01-08 Patch for run_stager.R (#108) and template update v2.11.1 (#109). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 474b860..059da2f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/rnasplice + This report has been generated by the nf-core/rnasplice analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-rnasplice-methods-description": diff --git a/bin/run_dexseq_dtu.R b/bin/run_dexseq_dtu.R index fca4364..5fe3d3b 100755 --- a/bin/run_dexseq_dtu.R +++ b/bin/run_dexseq_dtu.R @@ -77,21 +77,21 @@ vectorToDataFrame <- function(x) { # Read samples table -samples <- read.delim(samples, stringsAsFactors = TRUE) +samples <- read.delim(samples, stringsAsFactors = TRUE, check.names = FALSE) colnames(samples) <- c("sample", "condition") # Read contrasts table -contrasts <- read.csv(contrasts) +contrasts <- read.csv(contrasts, check.names = FALSE) contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE] # Read counts table -counts <- read.table(counts, sep = "\t", header = TRUE) +counts <- read.table(counts, sep = "\t", header = TRUE, check.names = FALSE) annotation <- data.frame( featureID = counts$feature_id, diff --git a/bin/run_dexseq_exon.R b/bin/run_dexseq_exon.R index 3528937..162ab66 100755 --- a/bin/run_dexseq_exon.R +++ b/bin/run_dexseq_exon.R @@ -99,7 +99,7 @@ write.plotDEXSeq <- function(x, file, ntop = 10) { # Read samples table -samples <- read.csv(samples, stringsAsFactors = TRUE) +samples <- read.csv(samples, stringsAsFactors = TRUE, check.names = FALSE) samples <- samples[, c("sample", "condition"), drop = FALSE] @@ -107,7 +107,7 @@ samples <- unique(samples) # Read contrasts table -contrasts <- read.csv(contrasts) +contrasts <- read.csv(contrasts, check.names = FALSE) contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE] diff --git a/bin/run_drimseq_filter.R b/bin/run_drimseq_filter.R index d2f3319..7153a3f 100755 --- a/bin/run_drimseq_filter.R +++ b/bin/run_drimseq_filter.R @@ -43,7 +43,7 @@ tx2gene <- read.csv(tximport_tx2gene, sep="\t", header = TRUE) ###################################### # Read in Sample sheet -samps <- read.csv(samplesheet, sep=",", header = TRUE) +samps <- read.csv(samplesheet, sep=",", header = TRUE, check.names = FALSE) # check header of sample sheet if (!c("sample") %in% colnames(samps) | !c("condition") %in% colnames(samps)) { diff --git a/bin/run_edger_exon.R b/bin/run_edger_exon.R index 8068bf7..39fd5c0 100755 --- a/bin/run_edger_exon.R +++ b/bin/run_edger_exon.R @@ -28,7 +28,7 @@ library(edgeR) # Read samples table -samples <- read.csv(samplesheet) +samples <- read.csv(samplesheet, check.names = FALSE) samples <- samples[, c("sample", "condition"), drop = FALSE] @@ -37,7 +37,7 @@ samples <- unique(samples) # Read contrasts table -contrasts <- read.csv(contrastsheet) +contrasts <- read.csv(contrastsheet, check.names = FALSE) contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE] diff --git a/bin/suppa_split_file.R b/bin/suppa_split_file.R index 1fec398..dfbf181 100755 --- a/bin/suppa_split_file.R +++ b/bin/suppa_split_file.R @@ -35,12 +35,18 @@ if (length(args) == 5){ } +###################################### +######### Read in input file ######### +###################################### + +input_data <- read.csv(input_file, sep = "\t", header = TRUE, check.names = FALSE) + ###################################### ####### Process samplesheet ########## ###################################### # Read in samplesheet -samplesheet <- read.csv(samplesheet, header = TRUE) +samplesheet <- read.csv(samplesheet, header = TRUE, check.names = FALSE) # check header of sample sheet if (!c("sample") %in% colnames(samplesheet) | !c("condition") %in% colnames(samplesheet)) { @@ -63,7 +69,7 @@ conditions <- unique(samplesheet[,"condition"]) ######################################################### # Function for taking all sample names associated with a given condition -split_files <- function(condition, samplesheet, input_file, output_file_suffix, prefix, calculate_ranges){ +split_files <- function(condition, samplesheet, input_data, output_file_suffix, prefix, calculate_ranges){ # Get indices of rows which cover given condition for ranges indices <- which(samplesheet$condition == condition) @@ -71,11 +77,8 @@ split_files <- function(condition, samplesheet, input_file, output_file_suffix, # Get sample names for given condition sample_names <- samplesheet[samplesheet$condition == condition,]$sample - # Read in input file - input_file <- read.csv(input_file, sep="\t", header=TRUE) - - # Check header of input_file contains all samples from processed samplesheet - if (!all(samplesheet$sample %in% colnames(input_file))) { + # Check header of input_data contains all samples from processed samplesheet + if (!all(samplesheet$sample %in% colnames(input_data))) { stop("suppa_split_file.R Input_file must contain samplesheet samples.", call.=FALSE) @@ -92,7 +95,7 @@ split_files <- function(condition, samplesheet, input_file, output_file_suffix, } # Subset input files and save out as new file - write.table(input_file[,sample_names, drop=F], file = output_file, quote = FALSE, sep = "\t") + write.table(input_data[,sample_names, drop=F], file = output_file, quote = FALSE, sep = "\t") # Get Cluster ranges which match the tpm and psi files above (1-3 4-6) # Column numbers have to be continuous, with no overlapping or missing columns between them. Ex:1-3,4-6 @@ -118,7 +121,7 @@ split_files <- function(condition, samplesheet, input_file, output_file_suffix, for (cond in conditions) { # Split files - split_files(cond, samplesheet, input_file, output_file_suffix, prefix, calculate_ranges) + split_files(cond, samplesheet, input_data, output_file_suffix, prefix, calculate_ranges) } diff --git a/nextflow.config b/nextflow.config index 3ac0090..2f63b75 100644 --- a/nextflow.config +++ b/nextflow.config @@ -356,7 +356,7 @@ manifest { description = """Alternative splicing analysis using RNA-seq.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0.2' + version = '1.0.3' doi = '10.5281/zenodo.8424632' }