Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix hub and edit docs #142

Merged
merged 4 commits into from
Feb 9, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
bug fixes for conda release
CChahrour committed Feb 8, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit ce2422a1718be6e2b559022f3263c51fa2209dd8
211 changes: 143 additions & 68 deletions seqnado/config.py
Original file line number Diff line number Diff line change
@@ -6,33 +6,40 @@
package_dir = os.path.dirname(os.path.abspath(__file__))
template_dir = os.path.join(package_dir, "workflow/config")


# Helper Functions
def get_user_input(prompt, default=None, is_boolean=False, choices=None):
while True:
user_input = input(f"{prompt} [{'/'.join(choices) if choices else default}]: ") or default
user_input = (
input(f"{prompt} [{'/'.join(choices) if choices else default}]: ")
or default
)
if is_boolean:
return user_input.lower() == 'yes'
return user_input.lower() == "yes"
if choices and user_input not in choices:
print(f"Invalid choice. Please choose from {', '.join(choices)}.")
continue
return user_input


def setup_configuration(assay, genome, template_data):
username = os.getenv('USER', 'unknown_user')
today = datetime.datetime.now().strftime('%Y-%m-%d')
project_name = get_user_input("What is your project name?", default=f"{username}_project")
username = os.getenv("USER", "unknown_user")
today = datetime.datetime.now().strftime("%Y-%m-%d")
project_name = get_user_input(
"What is your project name?", default=f"{username}_project"
)
project_name = project_name.replace(" ", "_")

common_config = {
'username': username,
'project_date': today,
'project_name': project_name,
'genome': genome
"username": username,
"project_date": today,
"project_name": project_name,
"genome": genome,
}

template_data.update(common_config)

with open(os.path.join(template_dir, 'preset_genomes.json'), 'r') as f:
with open(os.path.join(template_dir, "preset_genomes.json"), "r") as f:
genome_values = json.load(f)

genome_dict = {}
@@ -41,72 +48,135 @@ def setup_configuration(assay, genome, template_data):
genome = get_user_input("What is your genome name?", default="other")
genome_dict = {
genome: {
"indices": get_user_input("Path to Bowtie2 genome indices:") if assay in ["chip", "atac"] else get_user_input("Path to STAR v2.7.10b genome indices:"),
"indices": (
get_user_input("Path to Bowtie2 genome indices:")
if assay in ["chip", "atac"]
else get_user_input("Path to STAR v2.7.10b genome indices:")
),
"chromosome_sizes": get_user_input("Path to chromosome sizes file:"),
"gtf": get_user_input("Path to GTF file:"),
"blacklist": get_user_input("Path to blacklist bed file:")
"blacklist": get_user_input("Path to blacklist bed file:"),
}
}
else:
if genome in genome_values:
genome_dict[genome] = {
"indices": genome_values[genome].get('bt2_indices' if assay in ["chip", "atac"] else 'star_indices', ''),
"chromosome_sizes": genome_values[genome].get('chromosome_sizes', ''),
"gtf": genome_values[genome].get('gtf', ''),
"blacklist": genome_values[genome].get('blacklist', '')
"indices": genome_values[genome].get(
"bt2_indices" if assay in ["chip", "atac"] else "star_indices", ""
),
"chromosome_sizes": genome_values[genome].get("chromosome_sizes", ""),
"gtf": genome_values[genome].get("gtf", ""),
"blacklist": genome_values[genome].get("blacklist", ""),
}


genome_config = {
'genome': genome,
'indices': genome_dict[genome]['indices'],
'chromosome_sizes': genome_dict[genome]['chromosome_sizes'],
'gtf': genome_dict[genome]['gtf'],
"genome": genome,
"indices": genome_dict[genome]["indices"],
"chromosome_sizes": genome_dict[genome]["chromosome_sizes"],
"gtf": genome_dict[genome]["gtf"],
}
template_data.update(genome_config)


template_data['remove_blacklist'] = get_user_input("Do you want to remove blacklist regions? (yes/no)", default="yes", is_boolean=True)
if template_data['remove_blacklist']:
template_data['blacklist'] = genome_dict[genome]['blacklist']

template_data['remove_pcr_duplicates'] = get_user_input("Remove PCR duplicates? (yes/no)", default= "yes" if assay in ["chip", "atac"] else "no", is_boolean=True)
if template_data['remove_pcr_duplicates']:
template_data['remove_pcr_duplicates_method'] = get_user_input("Remove PCR duplicates method:", default="picard", choices=["picard"])
template_data["remove_blacklist"] = get_user_input(
"Do you want to remove blacklist regions? (yes/no)",
default="yes",
is_boolean=True,
)
if template_data["remove_blacklist"]:
template_data["blacklist"] = genome_dict[genome]["blacklist"]

template_data["remove_pcr_duplicates"] = get_user_input(
"Remove PCR duplicates? (yes/no)",
default="yes" if assay in ["chip", "atac"] else "no",
is_boolean=True,
)
if template_data["remove_pcr_duplicates"]:
template_data["remove_pcr_duplicates_method"] = get_user_input(
"Remove PCR duplicates method:", default="picard", choices=["picard"]
)

else:
template_data['remove_pcr_duplicates_method'] = "False"
template_data["remove_pcr_duplicates_method"] = "False"

if assay == "atac":
template_data['shift_atac_reads'] = get_user_input("Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True) if assay == "atac" else "False"
template_data["shift_atac_reads"] = (
get_user_input(
"Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True
)
if assay == "atac"
else "False"
)

if assay == "chip":
template_data['spikein'] = get_user_input("Do you have spikein? (yes/no)", default="no", is_boolean=True)
if template_data['spikein']:
template_data['normalisation_method'] = get_user_input("Normalisation method:", default="orlando", choices=["orlando", "with_input"])
template_data['reference_genome'] = get_user_input("Reference genome:", default="hg38")
template_data['spikein_genome'] = get_user_input("Spikein genome:", default="dm6")
template_data['fastq_screen_config'] = get_user_input("Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf")

template_data['make_bigwigs'] = get_user_input("Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True)
if template_data['make_bigwigs']:
template_data['pileup_method'] = get_user_input("Pileup method:", default="deeptools", choices=["deeptools", "homer"])
template_data['make_heatmaps'] = get_user_input("Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True)

if assay in ["chip", "atac"]:
template_data['call_peaks'] = get_user_input("Do you want to call peaks? (yes/no)", default="no", is_boolean=True)
if template_data['call_peaks']:
template_data['peak_calling_method'] = get_user_input("Peak caller:", default="lanceotron", choices=["lanceotron", "macs", "homer"])

template_data['run_deseq2'] = get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True) if assay == "rna" else "False"
template_data["spikein"] = get_user_input(
"Do you have spikein? (yes/no)", default="no", is_boolean=True
)
if template_data["spikein"]:
template_data["normalisation_method"] = get_user_input(
"Normalisation method:",
default="orlando",
choices=["orlando", "with_input"],
)
template_data["reference_genome"] = get_user_input(
"Reference genome:", default="hg38"
)
template_data["spikein_genome"] = get_user_input(
"Spikein genome:", default="dm6"
)
template_data["fastq_screen_config"] = get_user_input(
"Path to fastqscreen config:",
default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf",
)

template_data["make_bigwigs"] = get_user_input(
"Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
)
if template_data["make_bigwigs"]:
template_data["pileup_method"] = get_user_input(
"Pileup method:", default="deeptools", choices=["deeptools", "homer"]
)
template_data["make_heatmaps"] = get_user_input(
"Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
)

template_data['make_ucsc_hub'] = get_user_input("Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True)

template_data['UCSC_hub_directory'] = get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/") if template_data['make_ucsc_hub'] else "."
template_data['email'] = get_user_input("What is your email address?", default=f"{username}@example.com") if template_data['make_ucsc_hub'] else f"{username}@example.com"
template_data['color_by'] = get_user_input("Color by (for UCSC hub):", default="samplename") if template_data['make_ucsc_hub'] else "samplename"

template_data['options'] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS
if assay in ["chip", "atac"]:
template_data["call_peaks"] = get_user_input(
"Do you want to call peaks? (yes/no)", default="no", is_boolean=True
)
if template_data["call_peaks"]:
template_data["peak_calling_method"] = get_user_input(
"Peak caller:",
default="lanceotron",
choices=["lanceotron", "macs", "homer"],
)

template_data["run_deseq2"] = (
get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True)
if assay == "rna"
else "False"
)

template_data["make_ucsc_hub"] = get_user_input(
"Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True
)

template_data["UCSC_hub_directory"] = (
get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/")
if template_data["make_ucsc_hub"]
else "."
)
template_data["email"] = (
get_user_input("What is your email address?", default=f"{username}@example.com")
if template_data["make_ucsc_hub"]
else f"{username}@example.com"
)
template_data["color_by"] = (
get_user_input("Color by (for UCSC hub):", default="samplename")
if template_data["make_ucsc_hub"]
else "samplename"
)

template_data["options"] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS


# Tool Specific Options
@@ -178,35 +248,40 @@ def setup_configuration(assay, genome, template_data):
colormap: RdYlBu_r
"""


def create_config(assay, genome, rerun):
env = Environment(loader=FileSystemLoader(template_dir), auto_reload=False)

template = env.get_template("config.yaml.jinja")
template = env.get_template("config.yaml.jinja")
template_deseq2 = env.get_template("deseq2.qmd.jinja")

# Initialize template data
template_data = {'assay': assay, 'genome': genome}
template_data = {"assay": assay, "genome": genome}

# Setup configuration
setup_configuration(assay, genome, template_data)

# Create directory and render template
if rerun:
dir_name = os.getcwd()
with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
file.write(template.render(template_data))
else:
dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
os.makedirs(dir_name, exist_ok=True)
fastq_dir = os.path.join(dir_name, "fastq")
os.makedirs(fastq_dir, exist_ok=True)
with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:

with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
file.write(template.render(template_data))

# add deseq2 qmd file if rna
if assay == "rna":
with open(os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), 'w') as file:
with open(
os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), "w"
) as file:
file.write(template_deseq2.render(template_data))

print(f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file.")

print(
f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file."
)
21 changes: 12 additions & 9 deletions seqnado/workflow/rules/peak_call_chip.smk
Original file line number Diff line number Diff line change
@@ -43,16 +43,17 @@ rule macs2_with_input:
params:
options=seqnado.utils.check_options(config["macs"]["callpeak"]),
narrow=lambda wc, output: output.peaks.replace(".bed", "_peaks.narrowPeak"),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
threads: 1
resources:
mem_mb=2000,
time="0-02:00:00",
log:
"seqnado_output/logs/macs/{sample}_{treatment}.bed",
"seqnado_output/logs/macs/{sample}_{treatment}.log",
shell:
"""
macs2 callpeak -t {input.treatment} -c {input.control} -n seqnado_output/peaks/macs/{wildcards.treatment} -f BAMPE {params.options} > {log} 2>&1 &&
cat {params.narrow} | cut -f 1-3 > {output.peaks} || touch {output.peaks}
macs2 callpeak -t {input.treatment} -c {input.control} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
cat {params.narrow} | cut -f 1-3 > {output.peaks}
"""


@@ -70,7 +71,7 @@ rule macs2_no_input:
mem_mb=2000,
time="0-02:00:00",
log:
"seqnado_output/logs/macs/{sample}_{treatment}.bed",
"seqnado_output/logs/macs/{sample}_{treatment}.log",
shell:
"""
macs2 callpeak -t {input.treatment} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
@@ -85,7 +86,7 @@ rule homer_with_input:
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/homer/{sample}_{treatment}.bed",
"seqnado_output/logs/homer/{sample}_{treatment}.log",
params:
options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
threads: 1
@@ -106,7 +107,7 @@ rule homer_no_input:
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/homer/{sample}_{treatment}.bed",
"seqnado_output/logs/homer/{sample}_{treatment}.log",
params:
options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
threads: 1
@@ -128,10 +129,11 @@ rule lanceotron_with_input:
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/lanceotron/{sample}_{treatment}.bed",
"seqnado_output/logs/lanceotron/{sample}_{treatment}.log",
params:
threshold=get_lanceotron_threshold,
outdir=lambda wc, output: os.path.dirname(output.peaks),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
container:
"library://asmith151/seqnado/seqnado_extra:latest"
threads: 1
@@ -141,7 +143,7 @@ rule lanceotron_with_input:
shell:
"""
lanceotron callPeaksInput {input.treatment} -i {input.control} -f {params.outdir} --skipheader > {log} 2>&1 &&
cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} || touch {output.peaks}
cat {params.basename}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks}
"""


@@ -155,6 +157,7 @@ rule lanceotron_no_input:
params:
options=seqnado.utils.check_options(config["lanceotron"]["callpeak"]),
outdir=lambda wc, output: os.path.dirname(output.peaks),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
threads: 1
container:
"library://asmith151/seqnado/seqnado_extra:latest"
@@ -164,7 +167,7 @@ rule lanceotron_no_input:
shell:
"""
lanceotron callPeaks {input.treatment} -f {params.outdir} --skipheader {params.options} > {log} 2>&1 &&
cat {params.outdir}/{wildcards.sample}_{wildcards.treatment}_L-tron.bed | cut -f 1-3 > {output.peaks}
cat {params.basename}_L-tron.bed | cut -f 1-3 > {output.peaks}
"""


109 changes: 0 additions & 109 deletions seqnado/workflow/scripts/split_bam.py

This file was deleted.

88 changes: 0 additions & 88 deletions seqnado/workflow/scripts/split_bam2.py

This file was deleted.