Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: bug fixes for conda release #139

Merged
merged 2 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 143 additions & 68 deletions seqnado/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,40 @@
package_dir = os.path.dirname(os.path.abspath(__file__))
template_dir = os.path.join(package_dir, "workflow/config")


# Helper Functions
def get_user_input(prompt, default=None, is_boolean=False, choices=None):
while True:
user_input = input(f"{prompt} [{'/'.join(choices) if choices else default}]: ") or default
user_input = (
input(f"{prompt} [{'/'.join(choices) if choices else default}]: ")
or default
)
if is_boolean:
return user_input.lower() == 'yes'
return user_input.lower() == "yes"
if choices and user_input not in choices:
print(f"Invalid choice. Please choose from {', '.join(choices)}.")
continue
return user_input


def setup_configuration(assay, genome, template_data):
username = os.getenv('USER', 'unknown_user')
today = datetime.datetime.now().strftime('%Y-%m-%d')
project_name = get_user_input("What is your project name?", default=f"{username}_project")
username = os.getenv("USER", "unknown_user")
today = datetime.datetime.now().strftime("%Y-%m-%d")
project_name = get_user_input(
"What is your project name?", default=f"{username}_project"
)
project_name = project_name.replace(" ", "_")

common_config = {
'username': username,
'project_date': today,
'project_name': project_name,
'genome': genome
"username": username,
"project_date": today,
"project_name": project_name,
"genome": genome,
}

template_data.update(common_config)

with open(os.path.join(template_dir, 'preset_genomes.json'), 'r') as f:
with open(os.path.join(template_dir, "preset_genomes.json"), "r") as f:
genome_values = json.load(f)

genome_dict = {}
Expand All @@ -41,72 +48,135 @@ def setup_configuration(assay, genome, template_data):
genome = get_user_input("What is your genome name?", default="other")
genome_dict = {
genome: {
"indices": get_user_input("Path to Bowtie2 genome indices:") if assay in ["chip", "atac"] else get_user_input("Path to STAR v2.7.10b genome indices:"),
"indices": (
get_user_input("Path to Bowtie2 genome indices:")
if assay in ["chip", "atac"]
else get_user_input("Path to STAR v2.7.10b genome indices:")
),
"chromosome_sizes": get_user_input("Path to chromosome sizes file:"),
"gtf": get_user_input("Path to GTF file:"),
"blacklist": get_user_input("Path to blacklist bed file:")
"blacklist": get_user_input("Path to blacklist bed file:"),
}
}
else:
if genome in genome_values:
genome_dict[genome] = {
"indices": genome_values[genome].get('bt2_indices' if assay in ["chip", "atac"] else 'star_indices', ''),
"chromosome_sizes": genome_values[genome].get('chromosome_sizes', ''),
"gtf": genome_values[genome].get('gtf', ''),
"blacklist": genome_values[genome].get('blacklist', '')
"indices": genome_values[genome].get(
"bt2_indices" if assay in ["chip", "atac"] else "star_indices", ""
),
"chromosome_sizes": genome_values[genome].get("chromosome_sizes", ""),
"gtf": genome_values[genome].get("gtf", ""),
"blacklist": genome_values[genome].get("blacklist", ""),
}


genome_config = {
'genome': genome,
'indices': genome_dict[genome]['indices'],
'chromosome_sizes': genome_dict[genome]['chromosome_sizes'],
'gtf': genome_dict[genome]['gtf'],
"genome": genome,
"indices": genome_dict[genome]["indices"],
"chromosome_sizes": genome_dict[genome]["chromosome_sizes"],
"gtf": genome_dict[genome]["gtf"],
}
template_data.update(genome_config)


template_data['remove_blacklist'] = get_user_input("Do you want to remove blacklist regions? (yes/no)", default="yes", is_boolean=True)
if template_data['remove_blacklist']:
template_data['blacklist'] = genome_dict[genome]['blacklist']

template_data['remove_pcr_duplicates'] = get_user_input("Remove PCR duplicates? (yes/no)", default= "yes" if assay in ["chip", "atac"] else "no", is_boolean=True)
if template_data['remove_pcr_duplicates']:
template_data['remove_pcr_duplicates_method'] = get_user_input("Remove PCR duplicates method:", default="picard", choices=["picard"])
template_data["remove_blacklist"] = get_user_input(
"Do you want to remove blacklist regions? (yes/no)",
default="yes",
is_boolean=True,
)
if template_data["remove_blacklist"]:
template_data["blacklist"] = genome_dict[genome]["blacklist"]

template_data["remove_pcr_duplicates"] = get_user_input(
"Remove PCR duplicates? (yes/no)",
default="yes" if assay in ["chip", "atac"] else "no",
is_boolean=True,
)
if template_data["remove_pcr_duplicates"]:
template_data["remove_pcr_duplicates_method"] = get_user_input(
"Remove PCR duplicates method:", default="picard", choices=["picard"]
)

else:
template_data['remove_pcr_duplicates_method'] = "False"
template_data["remove_pcr_duplicates_method"] = "False"

if assay == "atac":
template_data['shift_atac_reads'] = get_user_input("Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True) if assay == "atac" else "False"
template_data["shift_atac_reads"] = (
get_user_input(
"Shift ATAC-seq reads? (yes/no)", default="yes", is_boolean=True
)
if assay == "atac"
else "False"
)

if assay == "chip":
template_data['spikein'] = get_user_input("Do you have spikein? (yes/no)", default="no", is_boolean=True)
if template_data['spikein']:
template_data['normalisation_method'] = get_user_input("Normalisation method:", default="orlando", choices=["orlando", "with_input"])
template_data['reference_genome'] = get_user_input("Reference genome:", default="hg38")
template_data['spikein_genome'] = get_user_input("Spikein genome:", default="dm6")
template_data['fastq_screen_config'] = get_user_input("Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf")

template_data['make_bigwigs'] = get_user_input("Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True)
if template_data['make_bigwigs']:
template_data['pileup_method'] = get_user_input("Pileup method:", default="deeptools", choices=["deeptools", "homer"])
template_data['make_heatmaps'] = get_user_input("Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True)

if assay in ["chip", "atac"]:
template_data['call_peaks'] = get_user_input("Do you want to call peaks? (yes/no)", default="no", is_boolean=True)
if template_data['call_peaks']:
template_data['peak_calling_method'] = get_user_input("Peak caller:", default="lanceotron", choices=["lanceotron", "macs", "homer"])

template_data['run_deseq2'] = get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True) if assay == "rna" else "False"
template_data["spikein"] = get_user_input(
"Do you have spikein? (yes/no)", default="no", is_boolean=True
)
if template_data["spikein"]:
template_data["normalisation_method"] = get_user_input(
"Normalisation method:",
default="orlando",
choices=["orlando", "with_input"],
)
template_data["reference_genome"] = get_user_input(
"Reference genome:", default="hg38"
)
template_data["spikein_genome"] = get_user_input(
"Spikein genome:", default="dm6"
)
template_data["fastq_screen_config"] = get_user_input(
"Path to fastqscreen config:",
default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf",
)

template_data["make_bigwigs"] = get_user_input(
"Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
)
if template_data["make_bigwigs"]:
template_data["pileup_method"] = get_user_input(
"Pileup method:", default="deeptools", choices=["deeptools", "homer"]
)
template_data["make_heatmaps"] = get_user_input(
"Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
)

template_data['make_ucsc_hub'] = get_user_input("Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True)

template_data['UCSC_hub_directory'] = get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/") if template_data['make_ucsc_hub'] else "."
template_data['email'] = get_user_input("What is your email address?", default=f"{username}@example.com") if template_data['make_ucsc_hub'] else f"{username}@example.com"
template_data['color_by'] = get_user_input("Color by (for UCSC hub):", default="samplename") if template_data['make_ucsc_hub'] else "samplename"

template_data['options'] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS
if assay in ["chip", "atac"]:
template_data["call_peaks"] = get_user_input(
"Do you want to call peaks? (yes/no)", default="no", is_boolean=True
)
if template_data["call_peaks"]:
template_data["peak_calling_method"] = get_user_input(
"Peak caller:",
default="lanceotron",
choices=["lanceotron", "macs", "homer"],
)

template_data["run_deseq2"] = (
get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True)
if assay == "rna"
else "False"
)

template_data["make_ucsc_hub"] = get_user_input(
"Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True
)

template_data["UCSC_hub_directory"] = (
get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/")
if template_data["make_ucsc_hub"]
else "."
)
template_data["email"] = (
get_user_input("What is your email address?", default=f"{username}@example.com")
if template_data["make_ucsc_hub"]
else f"{username}@example.com"
)
template_data["color_by"] = (
get_user_input("Color by (for UCSC hub):", default="samplename")
if template_data["make_ucsc_hub"]
else "samplename"
)

template_data["options"] = TOOL_OPTIONS_RNA if assay == "rna" else TOOL_OPTIONS


# Tool Specific Options
Expand Down Expand Up @@ -178,35 +248,40 @@ def setup_configuration(assay, genome, template_data):
colormap: RdYlBu_r
"""


def create_config(assay, genome, rerun):
env = Environment(loader=FileSystemLoader(template_dir), auto_reload=False)

template = env.get_template("config.yaml.jinja")
template = env.get_template("config.yaml.jinja")
template_deseq2 = env.get_template("deseq2.qmd.jinja")

# Initialize template data
template_data = {'assay': assay, 'genome': genome}
template_data = {"assay": assay, "genome": genome}

# Setup configuration
setup_configuration(assay, genome, template_data)

# Create directory and render template
if rerun:
dir_name = os.getcwd()
with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
file.write(template.render(template_data))
else:
dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
os.makedirs(dir_name, exist_ok=True)
fastq_dir = os.path.join(dir_name, "fastq")
os.makedirs(fastq_dir, exist_ok=True)
with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:

with open(os.path.join(dir_name, f"config_{assay}.yml"), "w") as file:
file.write(template.render(template_data))

# add deseq2 qmd file if rna
if assay == "rna":
with open(os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), 'w') as file:
with open(
os.path.join(dir_name, f"deseq2_{template_data['project_name']}.qmd"), "w"
) as file:
file.write(template_deseq2.render(template_data))

print(f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file.")

print(
f"Directory '{dir_name}' has been created with the 'config_{assay}.yml' file."
)
21 changes: 12 additions & 9 deletions seqnado/workflow/rules/peak_call_chip.smk
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ rule macs2_with_input:
params:
options=seqnado.utils.check_options(config["macs"]["callpeak"]),
narrow=lambda wc, output: output.peaks.replace(".bed", "_peaks.narrowPeak"),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
threads: 1
resources:
mem_mb=2000,
time="0-02:00:00",
log:
"seqnado_output/logs/macs/{sample}_{treatment}.bed",
"seqnado_output/logs/macs/{sample}_{treatment}.log",
shell:
"""
macs2 callpeak -t {input.treatment} -c {input.control} -n seqnado_output/peaks/macs/{wildcards.treatment} -f BAMPE {params.options} > {log} 2>&1 &&
cat {params.narrow} | cut -f 1-3 > {output.peaks} || touch {output.peaks}
macs2 callpeak -t {input.treatment} -c {input.control} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
cat {params.narrow} | cut -f 1-3 > {output.peaks}
"""


Expand All @@ -70,7 +71,7 @@ rule macs2_no_input:
mem_mb=2000,
time="0-02:00:00",
log:
"seqnado_output/logs/macs/{sample}_{treatment}.bed",
"seqnado_output/logs/macs/{sample}_{treatment}.log",
shell:
"""
macs2 callpeak -t {input.treatment} -n {params.basename} -f BAMPE {params.options} > {log} 2>&1 &&
Expand All @@ -85,7 +86,7 @@ rule homer_with_input:
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/homer/{sample}_{treatment}.bed",
"seqnado_output/logs/homer/{sample}_{treatment}.log",
params:
options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
threads: 1
Expand All @@ -106,7 +107,7 @@ rule homer_no_input:
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/homer/{sample}_{treatment}.bed",
"seqnado_output/logs/homer/{sample}_{treatment}.log",
params:
options=seqnado.utils.check_options(config["homer"]["findpeaks"]),
threads: 1
Expand All @@ -128,10 +129,11 @@ rule lanceotron_with_input:
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
"seqnado_output/logs/lanceotron/{sample}_{treatment}.bed",
"seqnado_output/logs/lanceotron/{sample}_{treatment}.log",
params:
threshold=get_lanceotron_threshold,
outdir=lambda wc, output: os.path.dirname(output.peaks),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
container:
"library://asmith151/seqnado/seqnado_extra:latest"
threads: 1
Expand All @@ -141,7 +143,7 @@ rule lanceotron_with_input:
shell:
"""
lanceotron callPeaksInput {input.treatment} -i {input.control} -f {params.outdir} --skipheader > {log} 2>&1 &&
cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} || touch {output.peaks}
cat {params.basename}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks}
"""


Expand All @@ -155,6 +157,7 @@ rule lanceotron_no_input:
params:
options=seqnado.utils.check_options(config["lanceotron"]["callpeak"]),
outdir=lambda wc, output: os.path.dirname(output.peaks),
basename=lambda wc, output: output.peaks.replace(".bed", ""),
threads: 1
container:
"library://asmith151/seqnado/seqnado_extra:latest"
Expand All @@ -164,7 +167,7 @@ rule lanceotron_no_input:
shell:
"""
lanceotron callPeaks {input.treatment} -f {params.outdir} --skipheader {params.options} > {log} 2>&1 &&
cat {params.outdir}/{wildcards.sample}_{wildcards.treatment}_L-tron.bed | cut -f 1-3 > {output.peaks}
cat {params.basename}_L-tron.bed | cut -f 1-3 > {output.peaks}
"""


Expand Down
Loading
Loading