From fdd002dbf5b6e2002609eca135ad5d21d946cb0a Mon Sep 17 00:00:00 2001 From: Alastair Smith <49727900+alsmith151@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:25:47 +0000 Subject: [PATCH 1/2] fix: correct bugs found on interacting with slurm scheduler (#158) * Add Singularity bind for presets ending with "s" * Set APPTAINER_BIND environment variable in cli_pipeline function * Update Singularity and Slurm configurations * Refactor APPTAINER_BINDPATH in cli_pipeline function * Increase runtime for trimgalore_paired rule * Refactor is_paired method in AssayNonIP class * Add rule align_single_spikein for aligning single spikein samples * add align single to ruleorder * Increase number of jobs to 100 in slurm-singularity profile * Refactor align_single_spikein rule * fix exo align mem resources * Increase runtime for deeptools_make_bigwigs rules * Add retries option to config.yaml * Add fastq_screen flag to Output class * Update QCFiles instantiation in NonRNAOutput and ChIPOutput * Add library complexity flag to QCFiles instantiation * Refactor pytest fixture in test_pipelines.py * Add genome indices to align_paired_spikein rule * Update rule order in snakefile_chip --------- Co-authored-by: CChahrour --- seqnado/cli.py | 13 +++- seqnado/design.py | 41 +++++++++--- .../profiles/profile_singularity/config.yaml | 2 +- .../profile_slurm_singularity/config.yaml | 25 ++++--- seqnado/workflow/rules/align.smk | 4 +- seqnado/workflow/rules/exogenous_norm.smk | 31 ++++----- seqnado/workflow/rules/fastq_trim.smk | 2 +- seqnado/workflow/rules/pileup_default.smk | 6 +- seqnado/workflow/rules/pileup_norm.smk | 65 ++++++++++++------- seqnado/workflow/snakefile_chip | 7 +- tests/test_pipelines.py | 6 +- 11 files changed, 127 insertions(+), 75 deletions(-) diff --git a/seqnado/cli.py b/seqnado/cli.py index 94b30eb4..57ecb1e6 100644 --- a/seqnado/cli.py +++ b/seqnado/cli.py @@ -144,6 +144,17 @@ def cli_pipeline( ] ) + # Set the APPTAINER_BIND environment variable + if preset.endswith("s"): + # output_dir = pathlib.Path("seqnado_output").absolute() + # output_dir.mkdir(exist_ok=True) + # os.environ["APPTAINER_BINDPATH"] = ", ".join([os.environ.get("APPTAINER_BINDPATH", "")]) + # # os.environ["APPTAINER_NO_HOME"] = "1" + # # os.environ["APPTAINER_CLEANENV"] = "1" + # os.environ["APPTAINER_CWD"] = os.getcwd() + + print(f"APPTAINER_BINDPATH: {os.environ['APPTAINER_BINDPATH']}") + cmd.extend(["--show-failed-logs"]) # Print the logo @@ -152,5 +163,5 @@ def cli_pipeline( print(logo) + completed = subprocess.run(cmd) - subprocess.run(cmd) diff --git a/seqnado/design.py b/seqnado/design.py index c277fd9b..8b54865c 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -169,9 +169,15 @@ class AssayNonIP(BaseModel): def fastq_paths(self): return [self.r1.path, self.r2.path] if self.is_paired else [self.r1.path] + @property def is_paired(self): - return self.r2.path.is_file() + if self.r2 is None: + return False + elif self.r2.path.is_file(): + return True + else: + return False @classmethod def from_fastq_files(cls, fq: List[FastqFile], **kwargs): @@ -200,10 +206,6 @@ class AssayIP(AssayNonIP): def is_control(self) -> bool: return self.r1.is_control - @property - def is_paired(self): - return self.r2.path.is_file() - class ExperimentIP(BaseModel): ip_files: AssayIP @@ -890,6 +892,9 @@ class Output(BaseModel): ucsc_hub_details: Optional[Dict[str, Any]] = None + fastq_screen: bool = False + library_complexity: bool = False + @property def merge_bigwigs(self): return "merge" in self.run_design.to_dataframe().columns @@ -972,7 +977,13 @@ def peaks(self): def files(self) -> List[str]: files = [] - files.extend(QCFiles(assay=self.assay).files) + files.extend( + QCFiles( + assay=self.assay, + fastq_screen=self.fastq_screen, + library_complexity=self.library_complexity, + ).files + ) for file_list in ( self.bigwigs, @@ -1026,12 +1037,18 @@ def peaks(self) -> List[str]: files = pcf_samples.files return files or [] - + @computed_field @property def files(self) -> List[str]: files = [] - files.extend(QCFiles(assay=self.assay).files) + files.extend( + QCFiles( + assay=self.assay, + fastq_screen=self.fastq_screen, + library_complexity=self.library_complexity, + ).files + ) for file_list in ( self.bigwigs, @@ -1092,7 +1109,13 @@ def spikeins(self): @property def files(self) -> List[str]: files = [] - files.extend(QCFiles(assay=self.assay).files) + files.extend( + QCFiles( + assay=self.assay, + fastq_screen=self.fastq_screen, + library_complexity=self.library_complexity, + ).files + ) for file_list in ( self.bigwigs, diff --git a/seqnado/workflow/envs/profiles/profile_singularity/config.yaml b/seqnado/workflow/envs/profiles/profile_singularity/config.yaml index ddc75dfc..e0e0b203 100644 --- a/seqnado/workflow/envs/profiles/profile_singularity/config.yaml +++ b/seqnado/workflow/envs/profiles/profile_singularity/config.yaml @@ -5,7 +5,7 @@ jobs: 100 use-conda: "False" software-deployment-method: "apptainer" use-apptainer: "True" -# apptainer-args: "--bind $PWD --bind /databank --bind /ceph" +apptainer-args: "-H $PWD" show-failed-logs: "True" printshellcmds: "True" diff --git a/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml b/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml index aed50035..01a3f504 100644 --- a/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml +++ b/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml @@ -1,17 +1,26 @@ +__use_yte__: true executor: slurm jobs: 100 +software-deployment-method: + - "apptainer" +use-apptainer: true +# apptainer-args: "-H $PWD" -use-conda: "False" -software-deployment-method: "apptainer" -use-apptainer: "True" -# apptainer-args: "--bind $PWD --bind /databank --bind /ceph" - -show-failed-logs: "True" -printshellcmds: "True" +show-failed-logs: true +printshellcmds: true +retries: 3 default-resources: slurm_partition: "short" runtime: "1h" - mem: "3G" \ No newline at end of file + mem: "3G" + +# local-storage-prefix: $TMPDIR +# default-storage-provider: fs +# shared-fs-usage: +# - persistence +# - software-deployment +# - sources +# - source-cache \ No newline at end of file diff --git a/seqnado/workflow/rules/align.smk b/seqnado/workflow/rules/align.smk index 2b25bc48..20d74d85 100644 --- a/seqnado/workflow/rules/align.smk +++ b/seqnado/workflow/rules/align.smk @@ -14,7 +14,7 @@ rule align_paired: threads: config["bowtie2"]["threads"] resources: runtime=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}h", - mem="4GB", + mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB", log: "seqnado_output/logs/align/{sample}.log", shell: @@ -35,7 +35,7 @@ rule align_single: bam=temp("seqnado_output/aligned/raw/{sample}.bam"), resources: runtime=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}h", - mem="4GB", + mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB", threads: config["bowtie2"]["threads"] log: "seqnado_output/logs/align/{sample}.log", diff --git a/seqnado/workflow/rules/exogenous_norm.smk b/seqnado/workflow/rules/exogenous_norm.smk index 179373b5..78d9c414 100644 --- a/seqnado/workflow/rules/exogenous_norm.smk +++ b/seqnado/workflow/rules/exogenous_norm.smk @@ -2,33 +2,25 @@ from seqnado.design import NormGroups NORM_GROUPS = NormGroups.from_design(DESIGN) -rule align_paired_spikein: - input: - fq1="seqnado_output/trimmed/{sample}_1.fastq.gz", - fq2="seqnado_output/trimmed/{sample}_2.fastq.gz", +use rule align_paired as align_paired_spikein with: params: - index=config["genome"]["indices"], options="--no-mixed --no-discordant", + index=config["genome"]["indices"], output: bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"), - threads: config["bowtie2"]["threads"] resources: - mem=lambda wildcards, attempt: f"{4 * 2**attempt}GB", - runtime=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}h", - log: - "seqnado_output/logs/align/{sample}.log", - shell: - """ - bowtie2 -p {threads} {params.options} -x {params.index} -1 {input.fq1} -2 {input.fq2} 2> {log} | - samtools view -bS - > {output.bam} && - samtools sort -@ {threads} -o {output.bam}_sorted {output.bam} >> {log} 2>&1 && - mv {output.bam}_sorted {output.bam} - """ + mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB", + +use rule align_single as align_single_spikein with: + output: + bam=temp("seqnado_output/aligned/spikein/raw/{sample}.bam"), + resources: + mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB", use rule sort_bam as sort_bam_spikein with: input: - bam=rules.align_paired_spikein.output.bam, + bam="seqnado_output/aligned/spikein/raw/{sample}.bam", output: bam=temp("seqnado_output/aligned/spikein/sorted/{sample}.bam"), log: @@ -128,3 +120,6 @@ elif config["spikein_options"]["normalisation_method"] == "with_input": "seqnado_output/logs/normalisation_factors_{group}.log", script: "../scripts/calculate_spikein_norm_factors.py" + + +ruleorder: align_paired_spikein > align_single_spikein diff --git a/seqnado/workflow/rules/fastq_trim.smk b/seqnado/workflow/rules/fastq_trim.smk index fa37cad8..c76bba98 100644 --- a/seqnado/workflow/rules/fastq_trim.smk +++ b/seqnado/workflow/rules/fastq_trim.smk @@ -12,7 +12,7 @@ rule trimgalore_paired: threads: 4 resources: mem="2GB", - runtime="2h", + runtime="4h", params: options=check_options(config["trim_galore"]["options"]), trim_dir="seqnado_output/trimmed", diff --git a/seqnado/workflow/rules/pileup_default.smk b/seqnado/workflow/rules/pileup_default.smk index 9a34d37b..66a2828a 100644 --- a/seqnado/workflow/rules/pileup_default.smk +++ b/seqnado/workflow/rules/pileup_default.smk @@ -63,7 +63,7 @@ rule deeptools_make_bigwigs: options=lambda wildcards: format_deeptools_options(wildcards, config["deeptools"]["bamcoverage"]), resources: mem="2GB", - runtime="2h", + runtime="4h", threads: config["deeptools"]["threads"] log: "seqnado_output/logs/pileups/deeptools/{sample}.log", @@ -84,7 +84,7 @@ rule deeptools_make_bigwigs_rna_plus: threads: config["deeptools"]["threads"] resources: mem="2GB", - runtime="2h", + runtime="4h", log: "seqnado_output/logs/pileups/deeptools/{sample}_plus.log", shell: @@ -104,7 +104,7 @@ rule deeptools_make_bigwigs_rna_minus: threads: config["deeptools"]["threads"] resources: mem="2GB", - runtime="2h", + runtime="4h", log: "seqnado_output/logs/pileups/deeptools/{sample}_minus.log", shell: diff --git a/seqnado/workflow/rules/pileup_norm.smk b/seqnado/workflow/rules/pileup_norm.smk index b6c7f18f..1b9d2b2f 100644 --- a/seqnado/workflow/rules/pileup_norm.smk +++ b/seqnado/workflow/rules/pileup_norm.smk @@ -3,36 +3,38 @@ from seqnado.design import NormGroups NORM_GROUPS = NormGroups.from_design(DESIGN) + def format_feature_counts(counts: str) -> pd.DataFrame: counts = pd.read_csv(input.counts, sep="\t", comment="#") counts = counts.set_index("Geneid") - counts = counts.drop(columns=["Chr", "Start", "End", "Strand", "Length"], errors="ignore") + counts = counts.drop( + columns=["Chr", "Start", "End", "Strand", "Length"], errors="ignore" + ) return counts + def create_metadata(counts: pd.DataFrame) -> pd.DataFrame: return counts.columns.str.replace(".bam", "") -def get_scaling_factor(wildcards, scale_path: str) -> float: + +def get_scaling_factor(wildcards, scale_path: str) -> float: df = pd.read_csv(scale_path, sep="\t", header=None, index_col=0) return df.loc[wildcards.sample, "norm.factors"] def get_norm_factor_spikein(wildcards, negative=False): - import json group = NORM_GROUPS.get_sample_group(wildcards.sample) with open(f"seqnado_output/resources/{group}_normalisation_factors.json") as f: norm_factors = json.load(f) - + if not negative: return norm_factors[wildcards.sample] else: return -norm_factors[wildcards.sample] - - def format_deeptools_bamcoverage_options(wildcards): import re @@ -61,44 +63,49 @@ def format_homer_make_bigwigs_options(wildcards): return options + # CSAW Method rule tile_regions: input: chromsizes=config["genome"]["chromosome_sizes"], output: - genome_tiled="seqnado_output/resources/genome_tiled.gtf" + genome_tiled="seqnado_output/resources/genome_tiled.gtf", params: - tile_size=config["genome"].get("tile_size", 10_000) + tile_size=config["genome"].get("tile_size", 10_000), run: import pyranges as pr + chromsizes = ( pd.read_csv(chromsizes, sep="\t", header=None).set_index(0)[1].to_dict() ) genome_tiled = pr.gf.tile_genome(chromsizes, tile_size=tile_size) - genome_tiled = genome_tiled.df.assign(feature="tile", gene_id=lambda df: df.index.astype(str)).pipe(pr.PyRanges) + genome_tiled = genome_tiled.df.assign( + feature="tile", gene_id=lambda df: df.index.astype(str) + ).pipe(pr.PyRanges) genome_tiled.to_gtf(output.genome_tiled) rule count_bam: input: bam=expand("seqnado_output/aligned/{sample}.bam", sample=DESIGN.sample_names), - tiles="seqnado_output/resources/genome_tiled.gtf" + tiles="seqnado_output/resources/genome_tiled.gtf", output: - counts="seqnado_output/counts/counts.tsv" + counts="seqnado_output/counts/counts.tsv", threads: 8 shell: "featureCounts -a {input.tiles} -a {input.tiles} -t tiles -o {output.counts} {input.bam} -T {threads} -p --countReadPairs" + rule setup_for_scaling_factors: input: - counts="seqnado_output/counts/counts.tsv" + counts="seqnado_output/counts/counts.tsv", output: formatted_counts="seqnado_output/counts/{group}_formatted_counts.tsv", metadata="seqnado_output/counts/{group}_metadata.tsv", run: counts = format_counts(input.counts) counts.to_csv(output.formatted_counts, sep="\t") - + metadata = create_metadata(counts) metadata.to_csv(output.metadata, sep="\t", index=False, header=False) @@ -108,34 +115,36 @@ rule calculate_scaling_factors: formatted_counts="seqnado_output/counts/{group}_formatted_counts.tsv", metadata="seqnado_output/counts/{group}_metadata.tsv", output: - scaling_factors="seqnado_output/resources/{group}_scaling_factors.tsv" + scaling_factors="seqnado_output/resources/{group}_scaling_factors.tsv", script: "../scripts/calculate_scaling_factors.R" - + rule deeptools_make_bigwigs_scale: input: bam="seqnado_output/aligned/{sample}.bam", bai="seqnado_output/aligned/{sample}.bam.bai", - scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc, DESIGN)}_scaling_factors.tsv", + scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_scaling_factors.tsv", output: bigwig="seqnado_output/bigwigs/deeptools/csaw/{sample}.bigWig", params: - scale=lambda wc: get_scaling_factor(wc, f"seqnado_output/resources/{get_group_for_sample(wc, DESIGN)}_scaling_factors.tsv"), + scale=lambda wc: get_scaling_factor( + wc, + f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_scaling_factors.tsv", + ), options=check_options(config["deeptools"]["bamcoverage"]), threads: 8 log: - "seqnado_output/logs/deeptools/scaled/{sample}.log" + "seqnado_output/logs/deeptools/scaled/{sample}.log", shell: "bamCoverage -b {input.bam} -o {output.bigwig} --scaleFactor {params.scale} -p {threads} {params.options} > {log} 2>&1" - use rule deeptools_make_bigwigs_scale as deeptools_make_bigwigs_spikein with: input: bam="seqnado_output/aligned/{sample}.bam", bai="seqnado_output/aligned/{sample}.bam.bai", - scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc, DESIGN)}_normalisation_factors.json", + scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_normalisation_factors.json", output: bigwig="seqnado_output/bigwigs/deeptools/spikein/{sample}.bigWig", params: @@ -147,31 +156,37 @@ rule deeptools_make_bigwigs_rna_spikein_plus: input: bam="seqnado_output/aligned/{sample}.bam", bai="seqnado_output/aligned/{sample}.bam.bai", - scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc, DESIGN)}_normalisation_factors.json", + scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_normalisation_factors.json", output: bigwig="seqnado_output/bigwigs/deeptools/spikein/{sample}_plus.bigWig", params: options=lambda wildcards: format_deeptools_bamcoverage_options(wildcards), scale=get_norm_factor_spikein, threads: 8 + resources: + mem="2GB", + runtime="4h", log: - "seqnado_output/logs/deeptools/spikein/{sample}_plus.log" + "seqnado_output/logs/deeptools/spikein/{sample}_plus.log", shell: "bamCoverage -b {input.bam} -o {output.bigwig} -p {threads} --scaleFactor {params.scale} {params.options} --filterRNAstrand forward > {log} 2>&1" + rule deeptools_make_bigwigs_rna_spikein_minus: input: bam="seqnado_output/aligned/{sample}.bam", bai="seqnado_output/aligned/{sample}.bam.bai", - scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc, DESIGN)}_normalisation_factors.json", + scaling_factors=lambda wc: f"seqnado_output/resources/{get_group_for_sample(wc , DESIGN)}_normalisation_factors.json", output: bigwig="seqnado_output/bigwigs/deeptools/spikein/{sample}_minus.bigWig", params: options=lambda wildcards: format_deeptools_bamcoverage_options(wildcards), scale=lambda wc: get_norm_factor_spikein(wc, negative=True), threads: 8 + resources: + mem="2GB", + runtime="4h", log: - "seqnado_output/logs/deeptools/spikein/{sample}_minus.log" + "seqnado_output/logs/deeptools/spikein/{sample}_minus.log", shell: "bamCoverage -b {input.bam} -o {output.bigwig} -p {threads} --scaleFactor {params.scale} {params.options} --filterRNAstrand reverse > {log} 2>&1" - \ No newline at end of file diff --git a/seqnado/workflow/snakefile_chip b/seqnado/workflow/snakefile_chip index dcff90e4..c90b1fee 100644 --- a/seqnado/workflow/snakefile_chip +++ b/seqnado/workflow/snakefile_chip @@ -38,8 +38,6 @@ assert len(DESIGN.fastq_paths) > 0, "No fastq files found in the working directo symlink_fastq_files(DESIGN, output_dir="seqnado_output/fastqs") - - # Define global variables SAMPLE_NAMES = DESIGN.sample_names SAMPLE_NAMES_IP = DESIGN.sample_names_ip @@ -59,6 +57,7 @@ OUTPUT = ChIPOutput( **config ) + # Load required rules include: "rules/align.smk" include: "rules/alignment_post_processing.smk" @@ -76,10 +75,10 @@ include: "rules/hub.smk" if config["spikein"]: - ruleorder: move_ref_bam > align_paired + ruleorder: move_ref_bam > align_paired > align_single ruleorder: deeptools_make_bigwigs_spikein > deeptools_make_bigwigs else: - ruleorder: align_paired > move_ref_bam + ruleorder: align_paired > align_single > move_ref_bam # Define wildcard constraints wildcard_constraints: diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 55728018..fd8564d5 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -62,8 +62,8 @@ def genome_indices_path(genome_path, assay) -> pathlib.Path: return genome_path / "STAR_chr21_rna_spikein" -@pytest.fixture(scope="function", autouse=True) -def indicies(genome_indices_path, genome_path): +@pytest.fixture(scope="function") +def indicies(genome_indices_path, genome_path) -> pathlib.Path: download_indices = True if not genome_indices_path.exists() else False suffix = genome_indices_path.with_suffix(".tar.gz").name @@ -195,7 +195,7 @@ def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, bl defaults = { "project_name": "test", "genome_name": "hg38", - "indices": indicies, + "indices": str(indicies), "chromsizes": str(chromsizes), "gtf": str(gtf), "blacklist": str(blacklist), From 23d5210a0f9aaaaf2dc4de15940de4b515932b4d Mon Sep 17 00:00:00 2001 From: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:27:39 +0000 Subject: [PATCH 2/2] Fix add library complexity (#159) * Add Singularity bind for presets ending with "s" * Set APPTAINER_BIND environment variable in cli_pipeline function * Update Singularity and Slurm configurations * Refactor APPTAINER_BINDPATH in cli_pipeline function * Increase runtime for trimgalore_paired rule * Refactor is_paired method in AssayNonIP class * Add rule align_single_spikein for aligning single spikein samples * add align single to ruleorder * Increase number of jobs to 100 in slurm-singularity profile * Refactor align_single_spikein rule * fix exo align mem resources * Increase runtime for deeptools_make_bigwigs rules * Add retries option to config.yaml * Add fastq_screen flag to Output class * Update QCFiles instantiation in NonRNAOutput and ChIPOutput * Add library complexity flag to QCFiles instantiation * add lib comp to config and test * fix typo in config yaml * fix align paired rule * increase memory on sort bam spikein --------- Co-authored-by: alsmith --- seqnado/config.py | 6 +++++- seqnado/workflow/config/config.yaml.jinja | 2 +- seqnado/workflow/rules/exogenous_norm.smk | 3 ++- tests/test_pipelines.py | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/seqnado/config.py b/seqnado/config.py index c254ad1a..3fea73af 100644 --- a/seqnado/config.py +++ b/seqnado/config.py @@ -86,7 +86,11 @@ def setup_configuration(assay, genome, template_data): "Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf", ) - + + # Library Complexity + template_data["library_complexity"] = get_user_input( + "Calculate library complexity? (yes/no)", default="no", is_boolean=True + ) # Blacklist template_data["remove_blacklist"] = get_user_input( "Do you want to remove blacklist regions? (yes/no)", diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja index 5ae13c3d..493d0105 100755 --- a/seqnado/workflow/config/config.yaml.jinja +++ b/seqnado/workflow/config/config.yaml.jinja @@ -16,7 +16,7 @@ genome: fastq_screen: "{{fastq_screen}}" fastq_screen_config: "{{fastq_screen_config}}" - +library_complexity: "{{library_complexity}}" remove_blacklist: "{{remove_blacklist}}" blacklist: "{{blacklist}}" diff --git a/seqnado/workflow/rules/exogenous_norm.smk b/seqnado/workflow/rules/exogenous_norm.smk index 78d9c414..694d3b6d 100644 --- a/seqnado/workflow/rules/exogenous_norm.smk +++ b/seqnado/workflow/rules/exogenous_norm.smk @@ -23,10 +23,11 @@ use rule sort_bam as sort_bam_spikein with: bam="seqnado_output/aligned/spikein/raw/{sample}.bam", output: bam=temp("seqnado_output/aligned/spikein/sorted/{sample}.bam"), + resources: + mem=lambda wildcards, attempt: f"{8 * 2 ** (attempt - 1)}GB", log: "seqnado_output/logs/aligned_spikein/{sample}_sort.log", - use rule index_bam as index_bam_spikein with: input: bam=rules.sort_bam_spikein.output.bam, diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index fd8564d5..c585a0b2 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -200,6 +200,7 @@ def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, bl "gtf": str(gtf), "blacklist": str(blacklist), "fastq_screen": "no", + "library_complexity": "yes", "remove_blacklist": "yes", }