Skip to content

Commit

Permalink
add preset genomes to config
Browse files Browse the repository at this point in the history
  • Loading branch information
CChahrour committed Jan 10, 2024
1 parent b74cdbd commit 4a5a23f
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 32 deletions.
7 changes: 5 additions & 2 deletions seqnado/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

@click.command(context_settings=dict(ignore_unknown_options=True))
@click.argument("method", type=click.Choice(["atac", "chip", "rna", "snp"]))
def cli_config(method, help=False):
@click.option("-g", "--genome", default="other", help="Genome to use",
type=click.Choice(choices=['dm6', 'hg19', 'hg38', 'hg38_dm6', 'hg38_mm39', 'hg38_spikein', 'mm10', 'mm39', 'other']),)

def cli_config(method, help=False, genome="other"):
"""
Runs the config for the data processing pipeline.
"""
import seqnado.config as config
config.create_config(method)
config.create_config(method, genome)



Expand Down
74 changes: 61 additions & 13 deletions seqnado/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import datetime
from jinja2 import Environment, FileSystemLoader
import json

# Helper Functions
def get_user_input(prompt, default=None, is_boolean=False, choices=None):
Expand All @@ -13,11 +14,13 @@ def get_user_input(prompt, default=None, is_boolean=False, choices=None):
continue
return user_input

def setup_configuration(assay, template_data):


def setup_configuration(assay, genome, template_data):
username = os.getenv('USER', 'unknown_user')
today = datetime.datetime.now().strftime('%Y-%m-%d')
project_name = get_user_input("What is your project name?", default=f"{username}_project")
genome = get_user_input("What is your genome name?", default="hg38", choices=['dm6', 'hg19', 'hg38', 'hg38_dm6', 'hg38_mm39', 'hg38_spikein', 'mm9', 'mm10', 'mm10_TetR_ChrX_and_Chr8', 'mm39', 'other'])

common_config = {
'username': username,
'project_date': today,
Expand All @@ -26,14 +29,55 @@ def setup_configuration(assay, template_data):
}

template_data.update(common_config)

if assay in ["chip", "atac"]:
template_data['indicies'] = get_user_input("Path to Bowtie2 genome indices:", default=f"/ceph/project/milne_group/shared/seqnado_reference/{genome}/UCSC/bt2_index/{genome}")
elif assay == "rna":
template_data['indicies'] = get_user_input("Path to STAR genome indices:", default=f"/ceph/project/milne_group/shared/seqnado_reference/{genome}/UCSC/STAR_2.7.10b")

template_data['chromosome_sizes'] = get_user_input("Path to chromosome sizes file:", default=f"/ceph/project/milne_group/shared/seqnado_reference/{genome}/UCSC/sequence/{genome}.chrom.sizes")
template_data['gtf'] = get_user_input("Path to GTF file:", default=f"/ceph/project/milne_group/shared/seqnado_reference/{genome}/UCSC/genes/{genome}.ncbiRefSeq.gtf")
with open('seqnado/workflow/config/preset_genomes.json', 'r') as file:
genome_values = json.load(file)

genome_dict = {}

if genome == "other":
if assay in ["chip", "atac"]:
genome_dict = {
"other": {
"index": get_user_input("Path to Bowtie2 genome index:"),
"chromosome_sizes": get_user_input("Path to chromosome sizes file:"),
"gtf": get_user_input("Path to GTF file:"),
"blacklist": get_user_input("Path to blacklist bed file:")
}
}
elif assay == "rna":
genome_dict = {
"other": {
"index": get_user_input("Path to STAR v2.7.10b genome index:"),
"chromosome_sizes": get_user_input("Path to chromosome sizes file:"),
"gtf": get_user_input("Path to GTF file:"),
"blacklist": get_user_input("Path to blacklist bed file:")
}
}

elif genome in genome_values:
if assay in ["chip", "atac"]:
genome_dict = {
genome: {
"index": genome_values[genome]['bt2_index'],
"chromosome_sizes": genome_values[genome]['chromosome_sizes'],
"gtf": genome_values[genome]['gtf'],
"blacklist": genome_values[genome]['blacklist']
}
}
elif assay == "rna":
genome_dict = {
genome: {
"index": genome_values[genome]['star_index'],
"chromosome_sizes": genome_values[genome]['chromosome_sizes'],
"gtf": genome_values[genome]['gtf'],
"blacklist": genome_values[genome]['blacklist']
}
}

template_data['indicies'] = genome_dict[genome]['index']
template_data['chromosome_sizes'] = genome_dict[genome]['chromosome_sizes']
template_data['gtf'] = genome_dict[genome]['gtf']
template_data['read_type'] = get_user_input("What is your read type?", default="paired", choices=["paired", "single"])

template_data['split_fastq'] = get_user_input("Do you want to split FASTQ files? (yes/no)", default="no", is_boolean=True)
Expand All @@ -53,7 +97,7 @@ def setup_configuration(assay, template_data):

template_data['remove_blacklist'] = get_user_input("Do you want to remove blacklist regions? (yes/no)", default="yes", is_boolean=True)
if template_data['remove_blacklist']:
template_data['blacklist'] = get_user_input("Path to blacklist file:", default=f"/ceph/project/milne_group/shared/seqnado_reference/{genome}/UCSC/blacklist/{genome}.blacklist.bed")
template_data['blacklist'] = genome_dict[genome]['blacklist']

if assay == "atac":
template_data['shift_atac_reads'] = get_user_input("Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True)
Expand Down Expand Up @@ -181,18 +225,22 @@ def setup_configuration(assay, template_data):
# These need to be replaced
# e.g. --extendReads -bs 1 --normalizeUsing RPKM
bamcoverage: -bs 1 --normalizeUsing CPM
heatmap:
options:
colormap: RdYlBu_r
"""

def create_config(assay):
def create_config(assay, genome):
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template("seqnado/workflow/config/config.yaml.jinja")
template_deseq2 = env.get_template("seqnado/workflow/config/deseq2.qmd.jinja")

# Initialize template data
template_data = {'assay': assay}
template_data = {'assay': assay, 'genome': genome}

# Setup configuration
setup_configuration(assay, template_data)
setup_configuration(assay, genome, template_data)

# Create directory and render template
dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
Expand Down
18 changes: 9 additions & 9 deletions seqnado/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,14 @@ def define_output_files(
]
assay_output = []

if make_heatmaps:
assay_output.extend(
[
"seqnado_output/heatmap/heatmap.pdf",
"seqnado_output/heatmap/metaplot.pdf",
]
)

if make_ucsc_hub:
hub_dir = kwargs["ucsc_hub_details"].get("directory")
hub_name = kwargs["ucsc_hub_details"].get("name")
Expand Down Expand Up @@ -337,14 +345,6 @@ def define_output_files(
)
)

if make_heatmaps:
assay_output.extend(
[
"seqnado_output/heatmap/heatmap.pdf",
"seqnado_output/heatmap/metaplot.pdf",
]
)

elif assay == "RNA":
if make_bigwigs and pileup_method:
assay_output.extend(
Expand All @@ -355,7 +355,7 @@ def define_output_files(
strand=["plus", "minus"],
)
)

if run_deseq2:
project_id = kwargs["deseq2"].get("project_id")
assay_output.append(f"DESeq2_{project_id}.html")
Expand Down
58 changes: 58 additions & 0 deletions seqnado/workflow/config/preset_genomes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"dm6": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/dm6/UCSC/bt2_index/dm6",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/dm6/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/dm6/UCSC/sequence/dm6.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/dm6/UCSC/genes/dm6.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/dm6/dm6-blacklist.v2.bed.gz"
},
"hg19": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/hg19/UCSC/bt2_index/hg19",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/hg19/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/hg19/UCSC/sequence/hg19.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/hg19/UCSC/genes/hg19.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/hg19/hg19-blacklist.v2.bed.gz "
},
"hg38": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/bt2_index/hg38",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/sequence/hg38.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/hg38/UCSC/genes/hg38.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/hg38/hg38-blacklist.v2.bed.gz"
},
"hg38_dm6": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/hg38_dm6/UCSC/bt2_index/hg38_dm6",
"star_index": "NA",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/hg38_dm6/UCSC/sequence/hg38_dm6.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/hg38_dm6/UCSC/genes/hg38_dm6.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/hg38_dm6/hg38_dm6-blacklist.v2.bed.gz"
},
"hg38_mm39": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/hg38_mm39/bt2_index/hg38_mm39",
"star_index": "NA",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/hg38_mm39/sequence/hg38_mm39.fa.fai",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/hg38_mm39/genes/hg38_mm39.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/hg38_mm39/hg38_mm39-blacklist.bed.gz"
},
"hg38_spikein": {
"bt2_index": "NA",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/hg38_spikein/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/hg38_spikein/hg38_spikein.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/hg38_spikein/UCSC/genes/hg38_spikein_transcripts.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/hg38/hg38-blacklist.v2.bed.gz"
},
"mm10": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/mm10/UCSC/bt2_index/mm10",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/mm10/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/mm10/UCSC/sequence/mm10.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/mm10/UCSC/genes/mm10.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/mm10/mm10-blacklist.v2.bed.gz"
},
"mm39": {
"bt2_index": "/ceph/project/milne_group/shared/seqnado_reference/mm39/UCSC/bt2_index/mm39",
"star_index": "/ceph/project/milne_group/shared/seqnado_reference/mm39/UCSC/STAR_2.7.10b",
"chromosome_sizes": "/ceph/project/milne_group/shared/seqnado_reference/mm39/UCSC/sequence/mm39.chrom.sizes",
"gtf": "/ceph/project/milne_group/shared/seqnado_reference/mm39/UCSC/genes/mm39.ncbiRefSeq.gtf",
"blacklist": "/ceph/project/milne_group/shared/seqnado_reference/mm39/mm10-blacklist.v2.Liftover.mm39.bed.gz"
}
}
12 changes: 4 additions & 8 deletions seqnado/workflow/rules/heatmap.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import seqnado.utils

if ASSAY == "ChIP":
prefix = SAMPLE_NAMES_IP
elif ASSAY == "RNA":
prefix = [x + y for x in SAMPLE_NAMES for y in ["_plus", "_minus"]]
else:
prefix = SAMPLE_NAMES

Expand Down Expand Up @@ -34,10 +36,7 @@ rule heatmap_plot:
mem_mb=lambda wildcards, attempt: 2000 * 2**attempt,
log:
"seqnado_output/logs/heatmap/heatmap.log",
shell: """plotHeatmap -m {input.matrix} \
-out {output.heatmap} \
--colorMap {params.colormap} \
--boxAroundHeatmaps no"""
shell: """plotHeatmap -m {input.matrix} -out {output.heatmap} --colorMap {params.colormap} --boxAroundHeatmaps no"""


rule heatmap_metaplot:
Expand All @@ -51,7 +50,4 @@ rule heatmap_metaplot:
mem_mb=lambda wildcards, attempt: 2000 * 2**attempt,
log:
"seqnado_output/logs/heatmap/metaplot.log",
shell: """plotProfile -m {input.matrix} \
-out {output.metaplot} \
--colorMap {params.colormap} \
--perGroup"""
shell: """plotProfile -m {input.matrix} -out {output.metaplot} --perGroup"""
1 change: 1 addition & 0 deletions seqnado/workflow/snakefile_rna
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ include: "rules/alignment_counts.smk"
include: "rules/pileup.smk"
include: "rules/deseq2_rna.smk"
include: "rules/hub.smk"
include: "rules/heatmap.smk"


# Define output files
Expand Down

0 comments on commit 4a5a23f

Please sign in to comment.