Skip to content

Commit

Permalink
Fix symlink_files function to handle both paired
Browse files Browse the repository at this point in the history
and single-end assays
  • Loading branch information
alsmith151 committed Jan 25, 2024
1 parent f6a4774 commit f8750fc
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 10 deletions.
30 changes: 26 additions & 4 deletions seqnado/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ def from_dataframe(cls, df: pd.DataFrame, simplified: bool = True, **kwargs):
return cls(assays=experiments, **kwargs)


def symlink_files(
def symlink_files_paired(
output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
):
r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}_1.fastq.gz")
Expand All @@ -706,6 +706,18 @@ def symlink_files(
logger.warning(f"Symlink for {r2_path_new} already exists.")


def symlink_files_single(
output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
):
r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}.fastq.gz")

if not r1_path_new.exists():
try:
r1_path_new.symlink_to(assay.r1.path.resolve())
except FileExistsError:
logger.warning(f"Symlink for {r1_path_new} already exists.")


def symlink_fastq_files(
design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/"
) -> None:
Expand All @@ -717,18 +729,28 @@ def symlink_fastq_files(

if isinstance(design, Design):
for assay_name, assay in design.assays.items():
symlink_files(output_dir, assay, assay_name)
if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)

elif isinstance(design, DesignIP):
for experiment_name, experiment in design.assays.items():
assay = experiment.ip_files
assay_name = assay.name
symlink_files(output_dir, assay, assay_name)

if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)

if experiment.control_files:
assay = experiment.control_files
assay_name = assay.name
symlink_files(output_dir, assay, assay_name)
if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)


def define_output_files(
Expand Down
50 changes: 44 additions & 6 deletions seqnado/workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ rule fastqc_raw_paired:
"v3.0.1/bio/fastqc"


rule fastqc_raw_single:
input:
"seqnado_output/fastqs/{sample}.fastq.gz",
output:
html="seqnado_output/qc/fastqc_raw/{sample}.html",
zip="seqnado_output/qc/fastqc_raw/{sample}_fastqc.zip", # the suffix _fastqc.zip is necessary for multiqc to find the file. If not using multiqc, you are free to choose an arbitrary filename


use rule fastqc_raw_paired as fastqc_trimmed_paired with:
input:
"seqnado_output/trimmed/{sample}_{read}.fastq.gz",
Expand All @@ -29,6 +37,16 @@ use rule fastqc_raw_paired as fastqc_trimmed_paired with:
"seqnado_output/logs/fastqc_trimmed/{sample}_{read}.log",


use rule fastqc_raw_single as fastqc_trimmed_single with:
input:
"seqnado_output/trimmed/{sample}.fastq.gz",
output:
html="seqnado_output/qc/fastqc_trimmed/{sample}.html",
zip="seqnado_output/qc/fastqc_trimmed/{sample}_fastqc.zip", # the suffix _fastqc.zip is necessary for multiqc to find the file. If not using multiqc, you are free to choose an arbitrary filename
log:
"seqnado_output/logs/fastqc_trimmed/{sample}.log",


rule samtools_stats:
input:
bam="seqnado_output/aligned/raw/{sample}.bam",
Expand All @@ -47,7 +65,9 @@ use rule samtools_stats as samtools_stats_filtered with:
output:
stats="seqnado_output/qc/alignment_filtered/{sample}.txt",


if config["split_fastq"] == "False":

rule multiqc:
input:
expand(
Expand All @@ -61,7 +81,10 @@ if config["split_fastq"] == "False":
read=[1, 2],
),
expand("seqnado_output/qc/alignment_raw/{sample}.txt", sample=SAMPLE_NAMES),
expand("seqnado_output/qc/alignment_filtered/{sample}.txt", sample=SAMPLE_NAMES),
expand(
"seqnado_output/qc/alignment_filtered/{sample}.txt",
sample=SAMPLE_NAMES,
),
output:
"seqnado_output/qc/full_qc_report.html",
log:
Expand All @@ -71,6 +94,7 @@ if config["split_fastq"] == "False":
shell:
"multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1"


def get_fastqc_files(*args, **kwargs):
"""Return a list of fastq files for a given sample name."""
import pathlib
Expand Down Expand Up @@ -99,13 +123,24 @@ rule multiqc_raw:
"multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_raw -n fastq_raw_qc.html --force > {log} 2>&1"


def get_trimmed_files(wc):
"""Return a list of fastq files for a given sample name."""
import pathlib

fastqc_dir = pathlib.Path("seqnado_output/qc/fastqc_trimmed/")

fastqc_files = []
fq_files = pathlib.Path("seqnado_output/fastqs").glob("*.fastq.gz")
for fq_file in fq_files:
fastqc_file = fastqc_dir / (fq_file.stem.replace(".fastq", "") + ".html")
fastqc_files.append(str(fastqc_file))

return fastqc_files


rule multiqc_trimmed:
input:
expand(
"seqnado_output/qc/fastqc_trimmed/{sample}_{read}.html",
sample=SAMPLE_NAMES,
read=[1, 2],
),
get_trimmed_files,
output:
"seqnado_output/qc/fastq_trimmed_qc.html",
log:
Expand Down Expand Up @@ -162,3 +197,6 @@ rule multiqc_library_complexity:
mem_mb=lambda wildcards, attempt: 2000 * 2**attempt,
shell:
"multiqc -o seqnado_output/qc seqnado_output/aligned/duplicates_removed -n library_complexity_qc.html --force > {log} 2>&1"


ruleorder: fastqc_raw_paired > fastqc_raw_single > fastqc_trimmed_paired > fastqc_trimmed_single > samtools_stats > samtools_stats_filtered > multiqc_raw > multiqc_trimmed > multiqc_alignment_raw > multiqc_alignment_filtered > multiqc_library_complexity
1 change: 1 addition & 0 deletions seqnado/workflow/snakefile_chip
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ANALYSIS_OUTPUT = seqnado.utils.define_output_files(
**config
)


if config["spikein"]:
include: "rules/chip_refnorm.smk"
include: "rules/normalisation.smk"
Expand Down

0 comments on commit f8750fc

Please sign in to comment.