Skip to content

Commit

Permalink
remove split fastq from config and all rules
Browse files Browse the repository at this point in the history
  • Loading branch information
CChahrour committed Jan 26, 2024
1 parent abee85a commit 01bb877
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 156 deletions.
9 changes: 0 additions & 9 deletions docs/pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ $ seqnado-config chip
[6/23] chromosome_sizes (/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/sequence/hg38.chrom.sizes):
[7/23] indicies (/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/bt2_index/hg38):
[8/23] gtf (/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/genes/hg38.ncbiRefSeq.gtf):
[9/23] Select read_type
1 - paired
2 - single
Choose from [1/2] (1): 1
[10/23] Select split_fastq
1 - True
2 - False
Choose from [1/2] (1): 2
[11/23] split_fastq_parts (int):
[12/23] Select remove_pcr_duplicates_method
1 - picard
2 - deeptools
Expand Down
6 changes: 0 additions & 6 deletions seqnado/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def setup_configuration(assay, genome, template_data):
template_data['indicies'] = genome_dict[genome]['index']
template_data['chromosome_sizes'] = genome_dict[genome]['chromosome_sizes']
template_data['gtf'] = genome_dict[genome]['gtf']
template_data['read_type'] = get_user_input("What is your read type?", default="paired", choices=["paired", "single"])

template_data['remove_blacklist'] = get_user_input("Do you want to remove blacklist regions? (yes/no)", default="yes", is_boolean=True)
if template_data['remove_blacklist']:
Expand Down Expand Up @@ -114,11 +113,6 @@ def setup_configuration(assay, genome, template_data):
template_data['fastq_screen_config'] = get_user_input("Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf")
elif assay in ["atac", "rna"]:
template_data['normalisation_method'] = "False"

template_data['split_fastq'] = get_user_input("Do you want to split FASTQ files? (yes/no)", default="no", is_boolean=True)
if template_data['split_fastq']:
template_data.update['split_fastq_parts'] = get_user_input("How many parts do you want to split the FASTQ files into?", default="4")


template_data['make_bigwigs'] = get_user_input("Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True)
if template_data['make_bigwigs']:
Expand Down
5 changes: 0 additions & 5 deletions seqnado/workflow/config/config.yaml.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ genome:
chromosome_sizes: "{{chromosome_sizes}}"
gtf: "{{gtf}}"

read_type: "{{read_type}}"

remove_blacklist: "{{remove_blacklist}}"
blacklist: "{{blacklist}}"

Expand All @@ -30,9 +28,6 @@ spikein_options:
spikein_genome: "{{spikein_genome}}"
fastq_screen_config: "{{fastq_screen_config}}"

split_fastq: "{{split_fastq}}"
split_fastq_parts: "{{split_fastq_parts}}"

make_bigwigs: "{{make_bigwigs}}"
pileup_method: "{{pileup_method}}"
make_heatmaps: "{{make_heatmaps}}"
Expand Down
27 changes: 2 additions & 25 deletions seqnado/workflow/rules/align.smk
Original file line number Diff line number Diff line change
@@ -1,28 +1,5 @@
import seqnado.utils as utils

if config["split_fastq"] == "False":
rule align_paired:
input:
fq1="seqnado_output/trimmed/{sample}_1.fastq.gz",
fq2="seqnado_output/trimmed/{sample}_2.fastq.gz",
params:
index=config["genome"]["indicies"],
options=utils.check_options(config["bowtie2"]["options"]),
output:
bam=temp("seqnado_output/aligned/raw/{sample}.bam"),
threads: config["bowtie2"]["threads"]
resources:
mem_mb=4000,
log:
"seqnado_output/logs/align/{sample}.log",
shell:
"""bowtie2 -p {threads} -x {params.index} -1 {input.fq1} -2 {input.fq2} {params.options} 2> {log} |
samtools view -bS - > {output.bam} &&
samtools sort -@ {threads} -o {output.bam}_sorted {output.bam} >> {log} 2>&1 &&
mv {output.bam}_sorted {output.bam}
"""


rule align_paired:
input:
fq1="seqnado_output/trimmed/{sample}_1.fastq.gz",
Expand All @@ -34,8 +11,8 @@ rule align_paired:
bam=temp("seqnado_output/aligned/raw/{sample}.bam"),
threads: config["bowtie2"]["threads"]
resources:
time=lambda wildcards, attempt: "0-{hours}:00:00".format(hours=4 * 2**(attempt-1)),
mem_mb=4000,
time="0-04:00:00",
log:
"seqnado_output/logs/align/{sample}.log",
shell:
Expand All @@ -45,7 +22,6 @@ rule align_paired:
mv {output.bam}_sorted {output.bam}
"""


rule align_single:
input:
fq1="seqnado_output/trimmed/{sample}.fastq.gz",
Expand All @@ -55,6 +31,7 @@ rule align_single:
output:
bam=temp("seqnado_output/aligned/raw/{sample}.bam"),
resources:
time=lambda wildcards, attempt: "0-{hours}:00:00".format(hours=4 * 2**(attempt-1)),
mem_mb=4000,
threads: config["bowtie2"]["threads"]
log:
Expand Down
72 changes: 0 additions & 72 deletions seqnado/workflow/rules/fastq_split.smk

This file was deleted.

54 changes: 27 additions & 27 deletions seqnado/workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -70,33 +70,33 @@ use rule samtools_stats as samtools_stats_filtered with:
stats="seqnado_output/qc/alignment_filtered/{sample}.txt",


if config["split_fastq"] == "False":

rule multiqc:
input:
expand(
"seqnado_output/qc/fastqc_raw/{sample}_{read}_fastqc.html",
sample=SAMPLE_NAMES,
read=[1, 2],
),
expand(
"seqnado_output/qc/fastqc_trimmed/{sample}_{read}_fastqc.html",
sample=SAMPLE_NAMES,
read=[1, 2],
),
expand("seqnado_output/qc/alignment_raw/{sample}.txt", sample=SAMPLE_NAMES),
expand(
"seqnado_output/qc/alignment_filtered/{sample}.txt",
sample=SAMPLE_NAMES,
),
output:
"seqnado_output/qc/full_qc_report.html",
log:
"seqnado_output/logs/multiqc.log",
resources:
mem_mb=lambda wildcards, attempt: 2000 * 2**attempt,
shell:
"multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1"


rule multiqc:
input:
expand(
"seqnado_output/qc/fastqc_raw/{sample}_{read}_fastqc.html",
sample=SAMPLE_NAMES,
read=[1, 2],
),
expand(
"seqnado_output/qc/fastqc_trimmed/{sample}_{read}_fastqc.html",
sample=SAMPLE_NAMES,
read=[1, 2],
),
expand("seqnado_output/qc/alignment_raw/{sample}.txt", sample=SAMPLE_NAMES),
expand(
"seqnado_output/qc/alignment_filtered/{sample}.txt",
sample=SAMPLE_NAMES,
),
output:
"seqnado_output/qc/full_qc_report.html",
log:
"seqnado_output/logs/multiqc.log",
resources:
mem_mb=lambda wildcards, attempt: 2000 * 2**attempt,
shell:
"multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1"


def get_fastqc_files(*args, **kwargs):
Expand Down
8 changes: 2 additions & 6 deletions seqnado/workflow/snakefile_snp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,8 @@ else:

DESIGN = FASTQ_SAMPLES.design
SAMPLE_NAMES = FASTQ_SAMPLES.sample_names_all
if config["split_fastq"]:
include: "rules/fastq_split.smk"
else:
include: "rules/fastq_trim.smk"
include: "rules/align.smk"

include: "rules/fastq_trim.smk"
include: "rules/align.smk"
include: "rules/alignment_post_processing.smk"
include: "rules/hub.smk"
include: "rules/qc.smk"
Expand Down
2 changes: 0 additions & 2 deletions tests/test_atac.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,10 @@ def user_inputs(
"chromsizes": chromsizes,
"gtf": f"{data_path}/genome/chr21.gtf",
"blacklist": f"{data_path}/genome/hg19-blacklist.v2.chr21.bed.gz",
"read_type": "paired",
"remove_blacklist": "yes",
"remove_pcr_duplicates": "yes",
"remove_pcr_duplicates_method": "picard",
"shift_atac_reads": "yes",
"split_fastq": "no",
"make_bigwigs": "yes",
"pileup_method": "deeptools",
"make_heatmaps": "yes",
Expand Down
2 changes: 0 additions & 2 deletions tests/test_chip.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,10 @@ def user_inputs(
"chromsizes": chromsizes,
"gtf": f"{data_path}/genome/chr21.gtf",
"blacklist": f"{data_path}/genome/hg19-blacklist.v2.chr21.bed.gz",
"read_type": "paired",
"remove_blacklist": "yes",
"remove_pcr_duplicates": "yes",
"remove_pcr_duplicates_method": "picard",
"spikein": "no",
"split_fastq": "no",
"make_bigwigs": "yes",
"pileup_method": "deeptools",
"make_heatmaps": "yes",
Expand Down
2 changes: 0 additions & 2 deletions tests/test_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,8 @@ def user_inputs(
"chromsizes": chromsizes,
"gtf": f"{data_path}/genome/chr21.gtf",
"blacklist": f"{data_path}/genome/hg19-blacklist.v2.chr21.bed.gz",
"read_type": "paired",
"remove_blacklist": "yes",
"remove_pcr_duplicates": "no",
"split_fastq": "no",
"make_bigwigs": "yes",
"pileup_method": "deeptools",
"make_heatmaps": "yes",
Expand Down

0 comments on commit 01bb877

Please sign in to comment.