From 317a2141809197909ab096a5d69c5b0e6846541a Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 12 Jan 2024 14:52:49 +0000 Subject: [PATCH] Update RNA workflow and experiment configuration --- seqnado/workflow/snakefile_rna | 43 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/seqnado/workflow/snakefile_rna b/seqnado/workflow/snakefile_rna index 5e72531d..bd79f307 100644 --- a/seqnado/workflow/snakefile_rna +++ b/seqnado/workflow/snakefile_rna @@ -1,9 +1,16 @@ import os +import sys import shutil +from datetime import datetime import glob +from snakemake.utils import min_version +from seqnado.utils import Design import seqnado.utils as utils import pandas as pd +#################### +# Hardcoded config # +#################### ASSAY = "RNA" @@ -13,31 +20,29 @@ configfile: "config_rna.yml" container: "library://asmith151/seqnado/seqnado_pipeline:latest" +#################### +# Experiment config # +#################### + +# Load config utils.format_config_dict(config) -# Get experiment design +# Generate design if os.path.exists(config["design"]): - # Expect columns - sample fq1 fq2 - FASTQ_SAMPLES = utils.GenericFastqSamples( - pd.read_csv(config["design"], sep="[\s+,\t]", engine="python") - ) - assert FASTQ_SAMPLES.design.shape[0] > 0, "No samples found in design file" - for col in ["sample", "fq1", "fq2"]: - assert ( - col in FASTQ_SAMPLES.design.columns - ), f"Design file must contain columns sample, fq1, fq2. Columns found: {FASTQ_SAMPLES.design.columns}" + df = pd.read_csv(config["design"], sep="[\s+,\t]", engine="python") + DESIGN = Design.from_dataframe(df) else: - # Use pattern matching to get samples - fq_files = list(utils.get_fastq_files(".")) - if fq_files: - FASTQ_SAMPLES = utils.GenericFastqSamples.from_files(fq_files) - else: - raise ValueError("No FASTQ files found in the working directory") + DESIGN = Design.from_directory(".") + +# Attempt to symlink the fastq files +assert ( + len(DESIGN.fastq_paths) > 0 +), "No fastq files found in the working directory or no design file provided." +utils.symlink_fastq_files(DESIGN, output_dir="seqnado_output/fastqs") -DESIGN = FASTQ_SAMPLES.design -SAMPLE_NAMES = FASTQ_SAMPLES.sample_names_all -RUN_DESEQ2 = DESIGN.columns.str.contains("deseq2").any() +SAMPLE_NAMES = DESIGN.sample_names +RUN_DESEQ2 = DESIGN.to_dataframe().columns.str.contains("deseq2").any() include: "rules/qc.smk"