Skip to content

Commit

Permalink
Feat parameterise quarto (#170)
Browse files Browse the repository at this point in the history
* cleanup pileup logs

* cleanup bigwig threads

* correct orlando scaling factors

* Add deseq2_params rule and update deseq2.qmd.jinja

* Update file calculate_spikein_norm_factors.r to write normalization factors to snakemake output

---------

Co-authored-by: CChahrour <[email protected]>
  • Loading branch information
alsmith151 and CChahrour authored Apr 1, 2024
1 parent f797427 commit a48548b
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 24 deletions.
22 changes: 16 additions & 6 deletions seqnado/workflow/config/deseq2.qmd.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ format:
number-sections: true
editor_options:
chunk_output_type: console

params:
counts: "seqnado_output/readcounts/feature_counts/read_counts.tsv"
de_dir: "seqnado_output/deseq2_results/"
spikein_genes:
- "AmpR_seq"
- "Cas9_5p_seq"
- "Cas9_3p_seq"
size_factors_out: "seqnado_output/resources/all_normalisation_factors.json"

---

```{r setup, include=F}
Expand Down Expand Up @@ -64,7 +74,7 @@ print(kable(sample_info, align = "l", format = "simple", row.names = FALSE))
## Counts

```{r counts}
counts <- read.delim("seqnado_output/readcounts/feature_counts/read_counts.tsv", comment.char = "#") %>%
counts <- read.delim(params$counts, comment.char = "#") %>%
rename_with(~ gsub("(seqnado_output.aligned.)(.+)(.bam)", "\\2", .), everything()) %>%
column_to_rownames(var = "Geneid") %>%
dplyr::select(one_of(make.names(sample_info$sample))) %>%
Expand All @@ -86,29 +96,29 @@ print(kable(total_counts, align = "l", format = "simple", row.names = FALSE))
::: panel-tabset

```{r DEseq2}
de_dir <- "seqnado_output/deseq2_results/"
de_dir <- params$de_dir
unlink(de_dir, recursive = T)
dir.create(de_dir)

has_spikein <- sum(which(rownames(counts) %in% c("AmpR_seq", "Cas9_5p_seq", "Cas9_3p_seq"))) > 0
has_spikein <- sum(which(rownames(counts) %in% params$spikein_genes)) > 0

if (has_spikein) {
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = sample_info,
design = ~ deseq2)


spikein_genes <- which(rownames(counts) %in% c("AmpR_seq", "Cas9_5p_seq", "Cas9_3p_seq"))
spikein_genes <- which(rownames(counts) %in% params$spikein_genes)
dds <- estimateSizeFactors(dds, controlGenes=spikein_genes)
kable(colData(dds), row.names = F)
counts(dds) <- counts(dds)[!rownames(counts(dds)) %in% spikein_genes, ]
dds <- DESeq(dds, quiet = T)

# Output size factors

size_factors <- colData(dds)[, "sizeFactor"]
names(size_factors) <- colData(dds)[, "sample"]
sf <- toJSON(size_factors)
writeLines(sf, "seqnado_output/resources/all_normalisation_factors.json")
writeLines(sf, params$size_factors_out)

} else {

Expand Down
33 changes: 32 additions & 1 deletion seqnado/workflow/rules/deseq2_rna.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import pathlib


rule deseq2_report_rnaseq:
input:
counts="seqnado_output/readcounts/feature_counts/read_counts.tsv",
qmd=f"deseq2_{PROJECT_NAME}.qmd".replace(" ", ""),
yml="seqnado_output/resources/deseq2_params.yml"
output:
deseq2=f"deseq2_{PROJECT_NAME}.html".replace(" ", ""),
size_factors="seqnado_output/resources/all_normalisation_factors.json"
Expand All @@ -14,9 +18,36 @@ rule deseq2_report_rnaseq:
input_file=$(realpath "{input.qmd}")
base_dir=$(dirname $input_file)
cd "$base_dir"
quarto render {input.qmd} --no-cache --output {output.deseq2} --log {log}
quarto render {input.qmd} --no-cache --output {output.deseq2} --log {log} --execute-params {input.yml}
"""



rule deseq2_params:
output:
yml="seqnado_output/resources/deseq2_params.yml"
params:
spikein_genes=["AmpR_seq", "Cas9_5p_seq", "Cas9_3p_seq"],
size_factors_out="seqnado_output/resources/all_normalisation_factors.json",
de_dir=str(pathlib.Path(rules.deseq2_report_rnaseq.output.deseq2).parent),
counts=rules.deseq2_report_rnaseq.input.counts,
container: None
run:
import yaml

with open(output.yml, "w") as f:
yaml.dump(
{
"spikein_genes": params.spikein_genes,
"size_factors_out": params.size_factors_out,
"de_dir": params.de_dir,
"counts": params.counts,
},
f,
)



localrules:
deseq2_report_rnaseq,
deseq2_params
6 changes: 3 additions & 3 deletions seqnado/workflow/rules/pileup_default.smk
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ rule deeptools_make_bigwigs:
runtime="4h",
threads: config["deeptools"]["threads"]
log:
"seqnado_output/logs/pileups/deeptools/{sample}.log",
"seqnado_output/logs/pileups/deeptools/unscaled/{sample}.log",
shell:
"""
bamCoverage {params.options} -p {threads} -b {input.bam} -o {output.bigwig} > {log} 2>&1
Expand All @@ -86,7 +86,7 @@ rule deeptools_make_bigwigs_rna_plus:
mem="2GB",
runtime="4h",
log:
"seqnado_output/logs/pileups/deeptools/{sample}_plus.log",
"seqnado_output/logs/pileups/deeptools/unscaled/{sample}_plus.log",
shell:
"""
bamCoverage {params.options} -p {threads} --filterRNAstrand forward -b {input.bam} -o {output.bigwig} > {log} 2>&1
Expand All @@ -106,7 +106,7 @@ rule deeptools_make_bigwigs_rna_minus:
mem="2GB",
runtime="4h",
log:
"seqnado_output/logs/pileups/deeptools/{sample}_minus.log",
"seqnado_output/logs/pileups/deeptools/unscaled/{sample}_minus.log",
shell:
"""
bamCoverage {params.options} -p {threads} -b {input.bam} -o {output.bigwig} --filterRNAstrand reverse --scaleFactor -1 > {log} 2>&1
Expand Down
12 changes: 6 additions & 6 deletions seqnado/workflow/rules/pileup_norm.smk
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,9 @@ rule deeptools_make_bigwigs_scale:
f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_scaling_factors.tsv",
),
options=check_options(config["deeptools"]["bamcoverage"]),
threads: 8
threads: config["deeptools"]["threads"]
log:
"seqnado_output/logs/deeptools/scaled/{sample}.log",
"seqnado_output/logs/pileups/deeptools/scaled/{sample}.log",
shell:
"bamCoverage -b {input.bam} -o {output.bigwig} --scaleFactor {params.scale} -p {threads} {params.options} > {log} 2>&1"

Expand All @@ -162,12 +162,12 @@ rule deeptools_make_bigwigs_rna_spikein_plus:
params:
options=lambda wildcards: format_deeptools_bamcoverage_options(wildcards),
scale=get_norm_factor_spikein,
threads: 8
threads: config["deeptools"]["threads"]
resources:
mem="2GB",
runtime="4h",
log:
"seqnado_output/logs/deeptools/spikein/{sample}_plus.log",
"seqnado_output/logs/pileups/deeptools/spikein/{sample}_plus.log",
shell:
"bamCoverage -b {input.bam} -o {output.bigwig} -p {threads} --scaleFactor {params.scale} {params.options} --filterRNAstrand forward > {log} 2>&1"

Expand All @@ -182,11 +182,11 @@ rule deeptools_make_bigwigs_rna_spikein_minus:
params:
options=lambda wildcards: format_deeptools_bamcoverage_options(wildcards),
scale=lambda wc: get_norm_factor_spikein(wc, negative=True),
threads: 8
threads: config["deeptools"]["threads"]
resources:
mem="2GB",
runtime="4h",
log:
"seqnado_output/logs/deeptools/spikein/{sample}_minus.log",
"seqnado_output/logs/pileups/deeptools/spikein/{sample}_minus.log",
shell:
"bamCoverage -b {input.bam} -o {output.bigwig} -p {threads} --scaleFactor {params.scale} {params.options} --filterRNAstrand reverse > {log} 2>&1"
17 changes: 17 additions & 0 deletions seqnado/workflow/scripts/calculate_spikein_norm_factors.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

library(tidyverse)

# Load the data
counts <- read_table(snakemake@input[[1]])
metadata <- read_table(snakemake@input[[2]])

spikein_rows <- which(rownames(counts) %in% c("AmpR_seq", "Cas9_5p_seq", "Cas9_3p_seq"))

spikein_counts <- counts[spikein_rows, ]
size_factors <- colSums(spikein_counts) / median(colSums(spikein_counts))
normalized_counts <- sweep(counts, 2, size_factors, FUN="/")

size_factors <- colData(dds)[, "sizeFactor"]
names(size_factors) <- colData(dds)[, "sample"]
sf <- toJSON(size_factors)
writeLines(sf, snakemake@output[[1]])
16 changes: 8 additions & 8 deletions seqnado/workflow/scripts/calculate_spikein_norm_orlando.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# Read in stats
stats_files = snakemake.input
all_readcounts = []
all_readcounts = []

for stats in stats_files:
file_path = pathlib.Path(stats)
Expand All @@ -21,19 +21,19 @@
df_counts = pd.concat(all_readcounts, ignore_index=True)

# Calculate the ChIP spike-in normalization factor
df_counts["norm_factor"] = 1 / (df_counts["spikein_reads"] / 1e6)
df_counts["scale_factor"] = 1 / (df_counts["spikein_reads"] / 1e6)
# if df_counts["norm_factor"] == inf change to 1
df_counts["norm_factor"] = df_counts["norm_factor"].replace([np.inf, -np.inf], 1)
df_counts["scale_factor"] = df_counts["scale_factor"].replace([np.inf, -np.inf], 1)

df_counts["scale_factor"] = 1 / df_counts["norm_factor"]
# if df_counts["scale_factor"] == 0 change to 1
df_counts["scale_factor"] = df_counts["scale_factor"].replace([0], 1)

df_counts["spikein_percent"] = (df_counts["spikein_reads"] / df_counts["reference_reads"] * 100)


df_counts["spikein_percent"] = (
df_counts["spikein_reads"] / df_counts["reference_reads"] * 100
)

# Save the DataFrame with the calculated normalization factors
df_counts.to_csv(snakemake.output.normalisation_table, sep="\t", index=False)

scale = df_counts[["sample", "scale_factor"]].set_index("sample")["scale_factor"]
scale.to_json(snakemake.output.normalisation_factors)
scale.to_json(snakemake.output.normalisation_factors)
Loading

0 comments on commit a48548b

Please sign in to comment.