Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

suppa fix attempt 1 #86

Merged
merged 2 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions bin/suppa_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python3
# Author: Zifo Bioinformatics
# Email: [email protected]
# License: MIT

import argparse
import itertools


def main(args):
# Open file
handle = open(args.file, "r")

# Read header only
header = handle.readline()

# Split header by delimiter (e.g., transcript_GBR_1)
samples = header.split("\t")

# Trim replicate number (e.g., transcript_GBR_1 -> transcript_GBR)
conditions = [sample.rsplit("_", 1)[0] for sample in samples]

# Close file
handle.close()

# Create list of consecutive condition indices (e.g., [[1,2], [3,4]])
last_index = 0
out = []
for v, g in itertools.groupby(enumerate(conditions), lambda k: k[1]):
l = [*g]
out.append([last_index + 1, l[-1][0] + 1])
last_index += len(l)

# Assert that condition indices are consecutive
assert len(out) == 2, "Column numbers have to be continuous, with no overlapping or missing columns between them."

# Format ranges for printing
groups = ",".join([f"{start}-{end}" for start, end in out])

# Print to stdout without newline
print(groups, end="")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("file")
args = parser.parse_args()
main(args)
4 changes: 2 additions & 2 deletions modules/local/suppa_clusterevents.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process CLUSTEREVENTS {
input:
tuple val(cond1), val(cond2), path(dpsi)
tuple val(cond1), val(cond2), path(psivec)
val cluster_ranges // e.g. 1-3,4-6
val group_ranges // e.g. 1-3,4-6
val prefix
val clusterevents_dpsithreshold // val params.clusterevents_dpsithreshold
val clusterevents_eps // val params.clusterevents_eps
Expand Down Expand Up @@ -43,7 +43,7 @@ process CLUSTEREVENTS {
--eps $clusterevents_eps \\
--metric $clusterevents_metric \\
--min-pts $clusterevents_min_pts \\
--groups $cluster_ranges \\
--groups $group_ranges \\
--clustering $clusterevents_method \\
$clusterevents_sigthreshold $clusterevents_separation -o ${cond1}-${cond2}_${prefix}_cluster

Expand Down
24 changes: 24 additions & 0 deletions modules/local/suppa_clustergroups.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
process CLUSTERGROUPS {
tag "${cond1}-${cond2}"
label 'process_single'

conda "conda-forge::python=3.9.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1' :
'biocontainers/python:3.9--1' }"

input:
tuple val(cond1), val(cond2), path(psivec)

output:
stdout

when:
task.ext.when == null || task.ext.when

script:
"""
suppa_groups.py $psivec
"""

}
56 changes: 30 additions & 26 deletions subworkflows/local/suppa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ include { SPLIT_FILES as SPLIT_FILES_IOI } from '../../modules/local/suppa_split
include { DIFFSPLICE as DIFFSPLICE_IOE } from '../../modules/local/suppa_diffsplice.nf'
include { DIFFSPLICE as DIFFSPLICE_IOI } from '../../modules/local/suppa_diffsplice.nf'

include { CLUSTERGROUPS as CLUSTERGROUPS_IOE } from '../../modules/local/suppa_clustergroups.nf'
include { CLUSTERGROUPS as CLUSTERGROUPS_IOI } from '../../modules/local/suppa_clustergroups.nf'

include { CLUSTEREVENTS as CLUSTEREVENTS_IOE } from '../../modules/local/suppa_clusterevents.nf'
include { CLUSTEREVENTS as CLUSTEREVENTS_IOI } from '../../modules/local/suppa_clusterevents.nf'

Expand Down Expand Up @@ -85,7 +88,7 @@ workflow SUPPA {
ch_dpsi_local = Channel.empty()
ch_psivec_local = Channel.empty()

ch_ranges_ioe = Channel.empty()
ch_groups_ioe = Channel.empty()
ch_cluster_vec_local = Channel.empty()
ch_cluster_log_local = Channel.empty()

Expand Down Expand Up @@ -149,13 +152,13 @@ workflow SUPPA {

ch_suppa_local_contrasts = ch_suppa_local_contrasts
.map { it -> [it['treatment'], it] }
.cross ( ch_suppa_tpm_conditions )
.map { it -> it[0][1] + ['tpm1': it[1][1]] }
.combine ( ch_suppa_tpm_conditions, by: 0 )
.map { it -> it[1] + ['tpm1': it[2]] }

ch_suppa_local_contrasts = ch_suppa_local_contrasts
.map { it -> [it['control'], it] }
.cross ( ch_suppa_tpm_conditions )
.map { it -> it[0][1] + ['tpm2': it[1][1]] }
.combine ( ch_suppa_tpm_conditions, by: 0 )
.map { it -> it[1] + ['tpm2': it[2]] }

// Add PSI files to contrasts channel

Expand All @@ -165,13 +168,13 @@ workflow SUPPA {

ch_suppa_local_contrasts = ch_suppa_local_contrasts
.map { it -> [it['treatment'], it] }
.cross ( ch_suppa_psi_conditions )
.map { it -> it[0][1] + ['psi1': it[1][1]] }
.combine ( ch_suppa_psi_conditions, by: 0 )
.map { it -> it[1] + ['psi1': it[2]] }

ch_suppa_local_contrasts = ch_suppa_local_contrasts
.map { it -> [it['control'], it] }
.cross ( ch_suppa_psi_conditions )
.map { it -> it[0][1] + ['psi2': it[1][1]] }
.combine ( ch_suppa_psi_conditions, by: 0 )
.map { it -> it[1] + ['psi2': it[2]] }

// Create input channels to diffsplice process

Expand Down Expand Up @@ -202,16 +205,16 @@ workflow SUPPA {

// Get ranges for cluster analysis

ch_ranges_ioe = SPLIT_FILES_IOE.out.ranges
.splitText( by: 1 ) { it.trim() }
.first()
CLUSTERGROUPS_IOE ( ch_psivec_local )

ch_groups_ioe = CLUSTERGROUPS_IOE.out

// Run Clustering

CLUSTEREVENTS_IOE(
ch_dpsi_local,
ch_psivec_local,
ch_ranges_ioe,
ch_groups_ioe,
prefix,
clusterevents_dpsithreshold,
clusterevents_eps,
Expand All @@ -237,7 +240,7 @@ workflow SUPPA {
ch_dpsi_isoform = Channel.empty()
ch_psivec_isoform = Channel.empty()

ch_ranges_ioi = Channel.empty()
ch_groups_ioi = Channel.empty()
ch_cluster_vec_isoform = Channel.empty()
ch_cluster_log_isoform = Channel.empty()

Expand Down Expand Up @@ -300,13 +303,13 @@ workflow SUPPA {

ch_suppa_isoform_contrasts = ch_suppa_isoform_contrasts
.map { it -> [it['treatment'], it] }
.cross ( ch_suppa_tpm_conditions )
.map { it -> it[0][1] + ['tpm1': it[1][1]] }
.combine ( ch_suppa_tpm_conditions, by: 0)
.map { it -> it[1] + ['tpm1': it[2]] }

ch_suppa_isoform_contrasts = ch_suppa_isoform_contrasts
.map { it -> [it['control'], it] }
.cross ( ch_suppa_tpm_conditions )
.map { it -> it[0][1] + ['tpm2': it[1][1]] }
.combine ( ch_suppa_tpm_conditions, by: 0)
.map { it -> it[1] + ['tpm2': it[2]] }

// Add PSI files to contrasts channel

Expand All @@ -316,13 +319,13 @@ workflow SUPPA {

ch_suppa_isoform_contrasts = ch_suppa_isoform_contrasts
.map { it -> [it['treatment'], it] }
.cross ( ch_suppa_psi_conditions )
.map { it -> it[0][1] + ['psi1': it[1][1]] }
.combine ( ch_suppa_psi_conditions, by: 0 )
.map { it -> it[1] + ['psi1': it[2]] }

ch_suppa_isoform_contrasts = ch_suppa_isoform_contrasts
.map { it -> [it['control'], it] }
.cross ( ch_suppa_psi_conditions )
.map { it -> it[0][1] + ['psi2': it[1][1]] }
.combine ( ch_suppa_psi_conditions, by: 0 )
.map { it -> it[1] + ['psi2': it[2]] }

// Create input channels to diffsplice process

Expand Down Expand Up @@ -353,16 +356,16 @@ workflow SUPPA {

// Get ranges for cluster analysis

ch_ranges_ioi = SPLIT_FILES_IOI.out.ranges
.splitText( by: 1 ) { it.trim() }
.first()
CLUSTERGROUPS_IOI ( ch_psivec_isoform )

ch_groups_ioi = CLUSTERGROUPS_IOI.out

// Run Clustering

CLUSTEREVENTS_IOI(
ch_dpsi_isoform,
ch_psivec_isoform,
ch_ranges_ioi,
ch_groups_ioi,
prefix,
clusterevents_dpsithreshold,
clusterevents_eps,
Expand All @@ -375,6 +378,7 @@ workflow SUPPA {

ch_cluster_vec_isoform = CLUSTEREVENTS_IOI.out.clustvec
ch_cluster_log_isoform = CLUSTEREVENTS_IOI.out.cluster_log

}
}
}
Expand Down
1 change: 0 additions & 1 deletion subworkflows/local/tx2gene_tximport.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ workflow TX2GENE_TXIMPORT {
.set{ salmon_results }
UNTAR ( salmon_results.tar )
salmon_results = salmon_results.dir.mix(UNTAR.out.untar)
salmon_results.view()

//
// Quantify and merge counts across samples
Expand Down