Merge pull request #125 from nf-core/dev

Patch fix v1.0.3
nf-core · Feb 23, 2024 · 2b7adfa · 2b7adfa
2 parents f70bbce + 5560492
commit 2b7adfa
Show file tree

Hide file tree

Showing 8 changed files with 28 additions and 20 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,11 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.0.3 - 2024-02-23
+
+- Improved TPM file splitting performance (Issue #120).
+- Fixed an issue where R scripts altered sample names upon loading (Issue #122).
+
 ## v1.0.2 - 2024-01-08
 
 Patch for run_stager.R (#108) and template update v2.11.1 (#109).

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/rnasplice/releases/tag/1.0.2" target="_blank">nf-core/rnasplice</a>
+  This report has been generated by the <a href="https://github.com/nf-core/rnasplice/releases/tag/1.0.3" target="_blank">nf-core/rnasplice</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/rnasplice/1.0.2/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/rnasplice/1.0.3/docs/output" target="_blank">documentation</a>.
 
 report_section_order:
   "nf-core-rnasplice-methods-description":

diff --git a/bin/run_dexseq_dtu.R b/bin/run_dexseq_dtu.R
@@ -77,21 +77,21 @@ vectorToDataFrame <- function(x) {
 
 # Read samples table
 
-samples <- read.delim(samples, stringsAsFactors = TRUE)
+samples <- read.delim(samples, stringsAsFactors = TRUE, check.names = FALSE)
 
 colnames(samples) <- c("sample", "condition")
 
 
 # Read contrasts table
 
-contrasts <- read.csv(contrasts)
+contrasts <- read.csv(contrasts, check.names = FALSE)
 
 contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE]
 
 
 # Read counts table
 
-counts <- read.table(counts, sep = "\t", header = TRUE)
+counts <- read.table(counts, sep = "\t", header = TRUE, check.names = FALSE)
 
 annotation <- data.frame(
     featureID = counts$feature_id,

diff --git a/bin/run_dexseq_exon.R b/bin/run_dexseq_exon.R
@@ -99,15 +99,15 @@ write.plotDEXSeq <- function(x, file, ntop = 10) {
 
 # Read samples table
 
-samples <- read.csv(samples, stringsAsFactors = TRUE)
+samples <- read.csv(samples, stringsAsFactors = TRUE, check.names = FALSE)
 
 samples <- samples[, c("sample", "condition"), drop = FALSE]
 
 samples <- unique(samples)
 
 # Read contrasts table
 
-contrasts <- read.csv(contrasts)
+contrasts <- read.csv(contrasts, check.names = FALSE)
 
 contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE]
 

diff --git a/bin/run_drimseq_filter.R b/bin/run_drimseq_filter.R
@@ -43,7 +43,7 @@ tx2gene <- read.csv(tximport_tx2gene, sep="\t", header = TRUE)
 ######################################
 
 # Read in Sample sheet
-samps <- read.csv(samplesheet, sep=",", header = TRUE)
+samps <- read.csv(samplesheet, sep=",", header = TRUE, check.names = FALSE)
 
 # check header of sample sheet
 if (!c("sample") %in% colnames(samps) | !c("condition") %in% colnames(samps)) {

diff --git a/bin/run_edger_exon.R b/bin/run_edger_exon.R
@@ -28,7 +28,7 @@ library(edgeR)
 
 # Read samples table
 
-samples <- read.csv(samplesheet)
+samples <- read.csv(samplesheet, check.names = FALSE)
 
 samples <- samples[, c("sample", "condition"), drop = FALSE]
 
@@ -37,7 +37,7 @@ samples <- unique(samples)
 
 # Read contrasts table
 
-contrasts <- read.csv(contrastsheet)
+contrasts <- read.csv(contrastsheet, check.names = FALSE)
 
 contrasts <- contrasts[, c("contrast", "treatment", "control"), drop = FALSE]
 

diff --git a/bin/suppa_split_file.R b/bin/suppa_split_file.R
@@ -35,12 +35,18 @@ if (length(args) == 5){
 
 }
 
+######################################
+######### Read in input file #########
+######################################
+
+input_data <- read.csv(input_file, sep = "\t", header = TRUE, check.names = FALSE)
+
 ######################################
 ####### Process samplesheet ##########
 ######################################
 
 # Read in samplesheet
-samplesheet <- read.csv(samplesheet, header = TRUE)
+samplesheet <- read.csv(samplesheet, header = TRUE, check.names = FALSE)
 
 # check header of sample sheet
 if (!c("sample") %in% colnames(samplesheet) | !c("condition") %in% colnames(samplesheet)) {
@@ -63,19 +69,16 @@ conditions <- unique(samplesheet[,"condition"])
 #########################################################
 
 # Function for taking all sample names associated with a given condition
-split_files <- function(condition, samplesheet, input_file, output_file_suffix, prefix, calculate_ranges){
+split_files <- function(condition, samplesheet, input_data, output_file_suffix, prefix, calculate_ranges){
 
     # Get indices of rows which cover given condition for ranges
     indices <- which(samplesheet$condition == condition)
 
     # Get sample names for given condition
     sample_names <- samplesheet[samplesheet$condition == condition,]$sample
 
-    # Read in input file
-    input_file <- read.csv(input_file, sep="\t", header=TRUE)
-
-    # Check header of input_file contains all samples from processed samplesheet
-    if (!all(samplesheet$sample %in% colnames(input_file))) {
+    # Check header of input_data contains all samples from processed samplesheet
+    if (!all(samplesheet$sample %in% colnames(input_data))) {
 
         stop("suppa_split_file.R Input_file must contain samplesheet samples.", call.=FALSE)
 
@@ -92,7 +95,7 @@ split_files <- function(condition, samplesheet, input_file, output_file_suffix,
     }
 
     # Subset input files and save out as new file
-    write.table(input_file[,sample_names, drop=F], file = output_file, quote = FALSE, sep = "\t")
+    write.table(input_data[,sample_names, drop=F], file = output_file, quote = FALSE, sep = "\t")
 
     # Get Cluster ranges which match the tpm and psi files above (1-3 4-6)
     # Column numbers have to be continuous, with no overlapping or missing columns between them. Ex:1-3,4-6
@@ -118,7 +121,7 @@ split_files <- function(condition, samplesheet, input_file, output_file_suffix,
 for (cond in conditions) {
 
     # Split files
-    split_files(cond, samplesheet, input_file, output_file_suffix, prefix, calculate_ranges)
+    split_files(cond, samplesheet, input_data, output_file_suffix, prefix, calculate_ranges)
 
 }
 

diff --git a/nextflow.config b/nextflow.config
@@ -356,7 +356,7 @@ manifest {
     description     = """Alternative splicing analysis using RNA-seq."""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '1.0.2'
+    version         = '1.0.3'
     doi             = '10.5281/zenodo.8424632'
 }