Skip to content

Commit

Permalink
Update RNA workflow and experiment configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
alsmith151 committed Jan 12, 2024
1 parent 2895c8d commit 317a214
Showing 1 changed file with 24 additions and 19 deletions.
43 changes: 24 additions & 19 deletions seqnado/workflow/snakefile_rna
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import os
import sys
import shutil
from datetime import datetime
import glob
from snakemake.utils import min_version
from seqnado.utils import Design
import seqnado.utils as utils
import pandas as pd

####################
# Hardcoded config #
####################
ASSAY = "RNA"


Expand All @@ -13,31 +20,29 @@ configfile: "config_rna.yml"
container: "library://asmith151/seqnado/seqnado_pipeline:latest"


####################
# Experiment config #
####################

# Load config
utils.format_config_dict(config)

# Get experiment design
# Generate design
if os.path.exists(config["design"]):
# Expect columns - sample fq1 fq2
FASTQ_SAMPLES = utils.GenericFastqSamples(
pd.read_csv(config["design"], sep="[\s+,\t]", engine="python")
)
assert FASTQ_SAMPLES.design.shape[0] > 0, "No samples found in design file"
for col in ["sample", "fq1", "fq2"]:
assert (
col in FASTQ_SAMPLES.design.columns
), f"Design file must contain columns sample, fq1, fq2. Columns found: {FASTQ_SAMPLES.design.columns}"
df = pd.read_csv(config["design"], sep="[\s+,\t]", engine="python")
DESIGN = Design.from_dataframe(df)
else:
# Use pattern matching to get samples
fq_files = list(utils.get_fastq_files("."))
if fq_files:
FASTQ_SAMPLES = utils.GenericFastqSamples.from_files(fq_files)
else:
raise ValueError("No FASTQ files found in the working directory")
DESIGN = Design.from_directory(".")

# Attempt to symlink the fastq files
assert (
len(DESIGN.fastq_paths) > 0
), "No fastq files found in the working directory or no design file provided."
utils.symlink_fastq_files(DESIGN, output_dir="seqnado_output/fastqs")


DESIGN = FASTQ_SAMPLES.design
SAMPLE_NAMES = FASTQ_SAMPLES.sample_names_all
RUN_DESEQ2 = DESIGN.columns.str.contains("deseq2").any()
SAMPLE_NAMES = DESIGN.sample_names
RUN_DESEQ2 = DESIGN.to_dataframe().columns.str.contains("deseq2").any()


include: "rules/qc.smk"
Expand Down

0 comments on commit 317a214

Please sign in to comment.