Skip to content

Commit

Permalink
Merge branch 'master' into dependabot/pip/pulp-lte-2.9.0
Browse files Browse the repository at this point in the history
  • Loading branch information
alsmith151 authored Jul 25, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents b80d8c9 + 6c9fdcc commit edcd9fc
Showing 28 changed files with 170 additions and 94 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -19,3 +19,4 @@ test_rna_size_factors.ipynb
tests/data/*
pytestdebug.log
sps*
seqnado.sif
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
@@ -68,7 +68,7 @@ pip install seqnado
Ensure that the environment variables are set correctly. This can be done by adding the following to your `.bashrc` or `.bash_profile`. Run this command to add the environment variables to your `.bashrc`:

```bash
echo export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank" >> ~/.bashrc
echo 'export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank"' >> ~/.bashrc
```

Reload the `.bashrc` file:
2 changes: 1 addition & 1 deletion install_seqnado.sh
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ conda activate base

# Set the environment variables for CCB
if [[ $(hostname) =~ "imm-" ]]; then
echo export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank" >> ~/.bashrc
echo 'export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank"' >> ~/.bashrc
export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank"
fi

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@ dynamic = ["version"]
dependencies = [
"click",
"cookiecutter",
"drmaa",
"pandas",
"pandera",
"pulp<=2.9.0",
39 changes: 39 additions & 0 deletions seqnado.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
BootStrap: docker
From: mambaorg/micromamba:bookworm

%post

# Pre set up
cd /opt/
apt update
apt install -y curl gcc git cmake make wget

# Install singularity
wget https://github.com/apptainer/apptainer/releases/download/v1.3.3/apptainer_1.3.3_amd64.deb
apt install -y ./apptainer_1.3.3_amd64.deb
apptainer remote add --no-login SylabsCloud cloud.sylabs.io
apptainer remote use SylabsCloud



# Mamba packages
#micromamba install -y -n base -f /opt/environment.yml
micromamba install -y -n base -c conda-forge python pip
export PATH="/opt/conda/bin:$PATH"

# Install Seqnado
git clone https://github.com/alsmith151/SeqNado.git
cd SeqNado
/opt/conda/bin/python -m pip install .

# Clean
#curl gcc git cmake make libtool g++ pkgconfig openssl-dev linux-headers
micromamba clean -afy
/opt/conda/bin/python -m pip cache purge
find /opt/conda/ -follow -type f -name '*.a' -delete
find /opt/conda/ -follow -type f -name '*.pyc' -delete
find /opt/conda/ -follow -type f -name '*.js.map' -delete


%environment
export PATH=/opt/conda/bin:$PATH
11 changes: 11 additions & 0 deletions seqnado/cli.py
Original file line number Diff line number Diff line change
@@ -113,6 +113,13 @@ def cli_design(method, files, output="design.csv"):
is_flag=True,
help="Remove symlinks created by previous runs. Useful for re-running pipeline after misconfiguration.",
)
@click.option(
'-s',
'--scale-resources',
help="Scale factor the memory and time resources for the pipeline",
default=1.0,
type=float
)
@click.option(
"-v",
"--verbose",
@@ -128,6 +135,7 @@ def cli_pipeline(
version=False,
verbose=False,
clean_symlinks=False,
scale_resources=1.0,
):
"""Runs the data processing pipeline"""

@@ -151,6 +159,9 @@ def cli_pipeline(

pipeline_options, cores = extract_cores_from_options(pipeline_options)

# Scale the memory and time resources
os.environ["SCALE_RESOURCES"] = str(scale_resources)

# Removes old symlinks if requested
if clean_symlinks:
logger.info("Cleaning symlinks")
21 changes: 20 additions & 1 deletion seqnado/helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Union, Optional, List, Tuple
from typing import Dict, Union, Optional, List, Tuple, Any
import pathlib
import numpy as np
import shlex
@@ -58,6 +58,25 @@ def extract_apptainer_args(options: List[str]) -> Tuple[List[str], str]:
return options, apptainer_args


def define_memory_requested(attempts: int = 1, initial_value: int = 1, scale: float = 1) -> str:
"""
Define the memory requested for the job.
"""
memory = int(initial_value) * 2 ** (int(attempts) - 1)
memory = memory * float(scale)
return f"{memory}G"

def define_time_requested(attempts: int = 1, initial_value: int = 1, scale: float = 1) -> str:
"""
Define the time requested for the job.
Base time is 1 hour.
"""
time = int(initial_value) * 2 ** (int(attempts) - 1)
time = time * float(scale)
return f"{time}h"


def symlink_file(
output_dir: pathlib.Path, source_path: pathlib.Path, new_file_name: str
):
10 changes: 5 additions & 5 deletions seqnado/workflow/rules/align.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options
from seqnado.helpers import check_options, define_time_requested, define_memory_requested



@@ -13,8 +13,8 @@ rule align_paired:
bam=temp("seqnado_output/aligned/raw/{sample}.bam"),
threads: config["bowtie2"]["threads"]
resources:
runtime=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}h",
mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
runtime=lambda wildcards, attempt: define_time_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
mem=lambda wildcards, attempt: define_memory_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/align/{sample}.log",
shell:
@@ -34,8 +34,8 @@ rule align_single:
output:
bam=temp("seqnado_output/aligned/raw/{sample}.bam"),
resources:
runtime=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}h",
mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
runtime=lambda wildcards, attempt: define_time_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
mem=lambda wildcards, attempt: define_memory_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
threads: config["bowtie2"]["threads"]
log:
"seqnado_output/logs/align/{sample}.log",
6 changes: 3 additions & 3 deletions seqnado/workflow/rules/align_rna.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options
from seqnado.helpers import check_options, define_memory_requested, define_time_requested

rule align_paired:
input:
@@ -15,8 +15,8 @@ rule align_paired:
),
threads: config["star"]["threads"]
resources:
mem="35GB",
runtime="6h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=35, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=6, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/align/{sample}.log",
shell:
14 changes: 7 additions & 7 deletions seqnado/workflow/rules/alignment_counts.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options
from seqnado.helpers import check_options, define_time_requested, define_memory_requested

rule feature_counts:
input:
@@ -11,8 +11,8 @@ rule feature_counts:
options=check_options(config["featurecounts"]["options"]),
threads: config["featurecounts"]["threads"]
resources:
mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB",
runtime="2h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=3, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/readcounts/featurecounts/featurecounts.log",
shell:
@@ -41,8 +41,8 @@ rule salmon_counts_paired:
options=check_options(config["salmon"]["options"]),
threads: config["salmon"]["threads"]
resources:
mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB",
runtime="2h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=3, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/readcounts/salmon/salmon_{sample}.log",
shell:
@@ -61,8 +61,8 @@ rule salmon_counts_single:
options=check_options(config["salmon"]["options"]),
threads: config["salmon"]["threads"]
resources:
mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB",
runtime="2h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=3, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/readcounts/salmon/salmon_{sample}.log",
shell:
16 changes: 8 additions & 8 deletions seqnado/workflow/rules/alignment_post_processing.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options
from seqnado.helpers import check_options, define_time_requested, define_memory_requested


rule sort_bam:
@@ -7,7 +7,7 @@ rule sort_bam:
output:
bam=temp("seqnado_output/aligned/sorted/{sample}.bam"),
resources:
mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
threads: 8
log:
"seqnado_output/logs/sorted/{sample}.log",
@@ -26,7 +26,7 @@ rule index_bam:
bai=temp("seqnado_output/aligned/sorted/{sample}.bam.bai"),
threads: 1
resources:
mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt - 1)}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
shell:
"samtools index -@ {threads} -b {input.bam}"

@@ -46,8 +46,8 @@ if config["remove_blacklist"] and os.path.exists(config.get("blacklist", "")):
params:
blacklist=check_options(config["blacklist"]),
resources:
mem="5GB",
runtime="4h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=5, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/blacklist/{sample}.log",
shell:
@@ -72,7 +72,7 @@ else:
),
threads: 1
resources:
mem="1GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=1, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/blacklist/{sample}.log",
shell:
@@ -99,8 +99,8 @@ if config["remove_pcr_duplicates_method"] == "picard":
params:
options=check_options(config["picard"]["options"]),
resources:
mem="5GB",
runtime="4h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=5, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/duplicates/{sample}.log",
shell:
6 changes: 3 additions & 3 deletions seqnado/workflow/rules/exogenous_norm.smk
Original file line number Diff line number Diff line change
@@ -5,14 +5,14 @@ use rule align_paired as align_paired_spikein with:
output:
bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"),
resources:
mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=8, attempts=attempt, scale=SCALE_RESOURCES),


use rule align_single as align_single_spikein with:
output:
bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"),
resources:
mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=8, attempts=attempt, scale=SCALE_RESOURCES),


use rule sort_bam as sort_bam_spikein with:
@@ -21,7 +21,7 @@ use rule sort_bam as sort_bam_spikein with:
output:
bam=temp("seqnado_output/aligned/spikein/sorted/{sample}.bam"),
resources:
mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=8, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/aligned_spikein/{sample}_sort.log",

5 changes: 4 additions & 1 deletion seqnado/workflow/rules/fastq_screen.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from seqnado.helpers import check_options, define_time_requested, define_memory_requested



rule fastq_screen_paired:
input:
@@ -54,7 +57,7 @@ rule multiqc_fastqscreen:
log:
"seqnado_output/logs/multiqc_fastqscreen.log",
resources:
mem=lambda wildcards, attempt: f"{2 * 2**attempt}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
shell:
"multiqc -o seqnado_output/qc -n full_fastqscreen_report.html --force seqnado_output/qc/fastq_screen > {log} 2>&1"

10 changes: 5 additions & 5 deletions seqnado/workflow/rules/fastq_trim.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options
from seqnado.helpers import check_options, define_time_requested, define_memory_requested


rule trimgalore_paired:
@@ -11,8 +11,8 @@ rule trimgalore_paired:
trimmed2=temp("seqnado_output/trimmed/{sample}_2.fastq.gz"),
threads: 4
resources:
mem="2GB",
runtime="4h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
params:
options=check_options(config["trim_galore"]["options"]),
trim_dir="seqnado_output/trimmed",
@@ -34,8 +34,8 @@ rule trimgalore_single:
trimmed=temp("seqnado_output/trimmed/{sample}.fastq.gz"),
threads: 4
resources:
mem="2GB",
runtime="2h",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
runtime=lambda wildcards, attempt: define_time_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
params:
options=check_options(config["trim_galore"]["options"]),
trim_dir="seqnado_output/trimmed",
8 changes: 4 additions & 4 deletions seqnado/workflow/rules/heatmap.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from seqnado.helpers import check_options, get_scale_method
from seqnado.helpers import check_options, get_scale_method, define_memory_requested, define_time_requested

if ASSAY == "ChIP":
prefix = SAMPLE_NAMES_IP
@@ -23,7 +23,7 @@ rule heatmap_matrix:
threads: config["deeptools"]["threads"]
resources:
runtime=lambda wildcards, attempt: f"{1 * 2**attempt}h",
mem=lambda wildcards, attempt: f"{4 * 2**attempt}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=4, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/heatmap/matrix.log",
shell:
@@ -38,7 +38,7 @@ rule heatmap_plot:
params:
colormap=check_options(config["heatmap"]["colormap"]),
resources:
mem=lambda wildcards, attempt: f"{2 * 2**attempt}GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/heatmap/heatmap.log",
shell:
@@ -51,7 +51,7 @@ rule heatmap_metaplot:
output:
metaplot="seqnado_output/heatmap/metaplot.pdf",
resources:
mem=lambda wildcards, attempt: f"{2 * 2**attempt}GB"
mem=lambda wildcards, attempt: define_memory_requested(initial_value=2, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/heatmap/metaplot.log",
shell:
2 changes: 1 addition & 1 deletion seqnado/workflow/rules/hub.smk
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ rule bed_to_bigbed:
params:
chrom_sizes=config["genome"]["chromosome_sizes"],
resources:
mem="1GB",
mem=lambda wildcards, attempt: define_memory_requested(initial_value=1, attempts=attempt, scale=SCALE_RESOURCES),
log:
"seqnado_output/logs/bed_to_bigbed/{directory}/{sample}.log",
shell:
Loading

0 comments on commit edcd9fc

Please sign in to comment.