Skip to content

Commit

Permalink
Merge branch 'CW-2986_duplicate_fastcat' into 'dev'
Browse files Browse the repository at this point in the history
CW-2986 - Get stats from ingress

Closes CW-2986

See merge request epi2melabs/workflows/wf-single-cell!141
  • Loading branch information
nrhorner committed Jan 18, 2024
2 parents 0a557ae + b87fafa commit 9637df6
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 27 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Updated
- Remove duplicate fastcat call.

## [v1.0.2]
### Fixed
- Error interpreting CSV data types during BAM tagging.
Expand Down
5 changes: 3 additions & 2 deletions bin/workflow_glue/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def main(args):
head_resources=[*LAB_head_resources])

with report.add_section('Read summaries', 'Read summary'):
SeqSummary(args.read_stats)
SeqSummary(args.read_stats_dir)

survival_df = pd.read_csv(args.survival, sep='\t', index_col=0)
wf_summ_df = pd.read_csv(args.wf_summary, sep='\t', index_col=0)
Expand Down Expand Up @@ -350,8 +350,9 @@ def argparser():
parser = wf_parser("report")

parser.add_argument(
"--read_stats_dir",
"--read_stats",
help="fastcat read stats file, with multiple samples concatenated")
help="fastcat read stats file or folder of files", type=Path)
parser.add_argument(
"--images", nargs='+',
help="Sample directories containing various images to put in report")
Expand Down
45 changes: 20 additions & 25 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ include { align } from './subworkflows/align'
include { process_bams } from './subworkflows/process_bams'


process summariseCatChunkReads {
process chunkReads {
// concatenate fastq and fastq.gz in a dir.
// Split into p parts where p is num threads

Expand All @@ -20,9 +20,6 @@ process summariseCatChunkReads {
tuple val(meta),
path(reads)
output:
tuple val(meta),
path("${meta.alias}.stats"),
emit: stats
tuple val(meta),
path("chunks/*"),
emit: fastq_chunks
Expand All @@ -33,8 +30,7 @@ process summariseCatChunkReads {
def seqkit_split_opts = (params.adapter_scan_chunk_size == 0) ? "-p $params.max_threads" : "-s $params.adapter_scan_chunk_size"

"""
fastcat -s ${meta.alias} -r ${meta.alias}.stats -x ${reads} | \
seqkit split2 --threads ${task.cpus} ${seqkit_split_opts} -O chunks -o ${meta.alias} -e .gz
seqkit split2 ${reads} --threads ${task.cpus} ${seqkit_split_opts} -O chunks -o ${meta.alias} -e .gz
"""
}

Expand Down Expand Up @@ -80,7 +76,7 @@ process makeReport {
input:
path 'versions'
path 'params.csv'
path 'read_stats.csv'
path 'per_read_stats/stats_?.tsv.gz'
path 'survival.tsv'
path 'wf_summary.tsv'
path umap_dirs
Expand All @@ -93,7 +89,7 @@ process makeReport {
report_name = "wf-single-cell-report.html"
"""
workflow-glue report \
--read_stats read_stats.csv \
--read_stats_dir per_read_stats \
--params params.csv \
--versions versions \
--survival survival.tsv \
Expand Down Expand Up @@ -232,6 +228,8 @@ workflow pipeline {
meta
ref_genome_dir
umap_genes
per_read_stats

main:
// throw an exception for deprecated conda users
if (workflow.profile.contains("conda")) {
Expand All @@ -247,10 +245,9 @@ workflow pipeline {
workflow_params = getParams()

bc_longlist_dir = file("${projectDir}/data", checkIfExists: true)
chunkReads(meta)

summariseCatChunkReads(meta)

stranding(summariseCatChunkReads.out.fastq_chunks)
stranding(chunkReads.out.fastq_chunks)

align(
stranding.out.stranded_fq,
Expand All @@ -274,13 +271,11 @@ workflow pipeline {
.join(process_bams.out.transcript_expression)
.join(process_bams.out.mitochondrial_expression)
.join(process_bams.out.umap_matrices))

makeReport(
software_versions,
workflow_params,
summariseCatChunkReads.out.stats
.map {it -> it[1]}
.collectFile(keepHeader:true),
per_read_stats.collect(),
prepare_report_data.out.survival
.collectFile(keepHeader:true),
prepare_report_data.out.summary
Expand All @@ -300,7 +295,6 @@ WorkflowMain.initialise(workflow, params, log)
workflow {

Pinguscript.ping_start(nextflow, workflow, params)

ref_genome_dir = file(params.ref_genome_dir, checkIfExists: true)

if (params.umap_plot_genes){
Expand All @@ -320,11 +314,14 @@ workflow {
samples = fastq_ingress([
"input":params.fastq,
"sample":params.sample,
"sample_sheet":params.sample_sheet])
.map {it[0, 1]}

"sample_sheet":params.sample_sheet,
"stats": true])

per_read_stats = samples.map {
meta, reads, stats ->
[meta, file(stats.resolve('*read*.tsv.gz'))[0]
]}.map {meta, stats -> stats}


if (!params.single_cell_sample_sheet) {

sc_sample_sheet = file("$projectDir/data/OPTIONAL_FILE")
Expand All @@ -340,12 +337,11 @@ workflow {
.map {it -> [it['sample_id'], it]}
// Merge the kit metadata onto the sample metadata
// Put sample_id as first element for join
samples.map {meta, reads -> [meta.alias, meta, reads]}
sample_and_kit_meta = samples.map {meta, reads -> [meta.alias, meta, reads]}
sample_and_kit_meta = samples.map {meta, reads, stats -> [meta.alias, meta, reads]}
.join(kit_meta)
.map {sample_id, sample_meta, reads, kit_meta -> [sample_meta + kit_meta, reads]}

pipeline(sample_and_kit_meta, ref_genome_dir, umap_genes)
pipeline(sample_and_kit_meta, ref_genome_dir, umap_genes, per_read_stats)

output(pipeline.out.results.flatMap({it ->
// Convert [meta, file, file, ..]
Expand All @@ -355,8 +351,7 @@ workflow {
l.add(tuple(it[0], it[i]))
}
return l
}).concat(pipeline.out.config_stats)
)
}))

output_report(pipeline.out.report)
}
Expand Down

0 comments on commit 9637df6

Please sign in to comment.