Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: chip input issue #136

Merged
merged 24 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
905046e
Fix slurm preset (#118)
CChahrour Jan 24, 2024
1a637c1
Delete setup.cfg file
alsmith151 Jan 25, 2024
ce2a8a5
fix: split peak call rules
alsmith151 Jan 25, 2024
761fe04
tests: added all peak call methods to atac test
alsmith151 Jan 25, 2024
1567b04
Update output file paths in
alsmith151 Jan 25, 2024
f6b5cb3
Update file paths in hub.smk
alsmith151 Jan 25, 2024
2ad12a7
fix: updated wildcards for bigBed files
alsmith151 Jan 25, 2024
f2fea5b
Refactor test_seqnado_config_creation function and
alsmith151 Jan 25, 2024
10a5de2
Update config file for chip sequencing
alsmith151 Jan 25, 2024
e8b61bf
fix: seqnado-design
alsmith151 Jan 25, 2024
76b4918
chore: removed commented code
alsmith151 Jan 25, 2024
4214dc7
Fix file path in lanceotron_no_input rule
alsmith151 Jan 25, 2024
f6a4774
Fix metadata and experiment creation in DesignIP
alsmith151 Jan 25, 2024
f8750fc
Fix symlink_files function to handle both paired
alsmith151 Jan 25, 2024
2979457
Add log and wrapper for fastqc_raw_single rule
alsmith151 Jan 25, 2024
abee85a
update config if ucsc is null
CChahrour Jan 25, 2024
1a5f082
Fix config (#119)
CChahrour Jan 26, 2024
1b16b5f
add entrypoint and chmod profile (#122)
CChahrour Jan 29, 2024
3b6defa
Feature add config rerun (#126)
CChahrour Feb 3, 2024
a66e207
move sigularity fix to faq in docs (#127)
CChahrour Feb 3, 2024
4407145
Merge branch 'master' into develop
CChahrour Feb 3, 2024
eb5f39a
feat(pipeline): handle failed peak calls (#131)
alsmith151 Feb 5, 2024
1f2d363
fix(pipeline): inputs not used for peak call (#132)
alsmith151 Feb 5, 2024
b26c33c
Fix: chip input issue (#135)
CChahrour Feb 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

Pipeline based on snakemake to process ChIP-seq, ATAC-seq, RNA-seq and short read WGS data for SNP calling.

See the [SeqNado documentation](alsmith151.github.io/SeqNado/) for more information.
See the SeqNado documentation https://alsmith151.github.io/SeqNado/ for more information.
14 changes: 14 additions & 0 deletions docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,17 @@
### Workflow defines configfile config_chip.yml but it is not present or accessible.

This error occurs when the pipeline is run without a config file present in the working directory. Follow the [Pipeline Setup](pipeline.md#create-a-design-file) instructions to create a config file.


## Singularity configuration

### Workflow Error

Failed to pull singularity image from library://asmith151/seqnado/seqnado_pipeline:latest:
FATAL: Unable to get library client configuration:
remote has no library client (see https://apptainer.org/docs/user/latest/endpoint.html#no-default-remote)

Fix:

apptainer remote add --no-login SylabsCloud cloud.sylabs.io
apptainer remote use SylabsCloud
5 changes: 5 additions & 0 deletions docs/pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ The following command will generate the working directory and configuration file

```bash
seqnado-config chip

# options
-r, --rerun # Re-run the config
-g, --genome [dm6|hg19|hg38|hg38_dm6|hg38_mm39|hg38_spikein|mm10|mm39|other] # Genome to use if genome preset is configured

```

You should get somthing like this:
Expand Down
5 changes: 3 additions & 2 deletions seqnado/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

@click.command(context_settings=dict(ignore_unknown_options=True))
@click.argument("method", type=click.Choice(["atac", "chip", "rna", "snp"]))
@click.option("-r", "--rerun", is_flag=True, help="Re-run the config")
@click.option(
"-g",
"--genome",
Expand All @@ -28,13 +29,13 @@
]
),
)
def cli_config(method, help=False, genome="other"):
def cli_config(method, help=False, genome="other", rerun=False):
"""
Runs the config for the data processing pipeline.
"""
import seqnado.config as config

config.create_config(method, genome)
config.create_config(method, genome, rerun)


@click.command()
Expand Down
23 changes: 14 additions & 9 deletions seqnado/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def setup_configuration(assay, genome, template_data):
colormap: RdYlBu_r
"""

def create_config(assay, genome):
def create_config(assay, genome, rerun):
env = Environment(loader=FileSystemLoader(template_dir), auto_reload=False)

template = env.get_template("config.yaml.jinja")
Expand All @@ -189,15 +189,20 @@ def create_config(assay, genome):

# Setup configuration
setup_configuration(assay, genome, template_data)

# Create directory and render template
dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
os.makedirs(dir_name, exist_ok=True)
fastq_dir = os.path.join(dir_name, "fastq")
os.makedirs(fastq_dir, exist_ok=True)

with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
file.write(template.render(template_data))
# Create directory and render template
if rerun:
dir_name = os.getcwd()
with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
file.write(template.render(template_data))
else:
dir_name = f"{template_data['project_date']}_{template_data['assay']}_{template_data['project_name']}"
os.makedirs(dir_name, exist_ok=True)
fastq_dir = os.path.join(dir_name, "fastq")
os.makedirs(fastq_dir, exist_ok=True)

with open(os.path.join(dir_name, f"config_{assay}.yml"), 'w') as file:
file.write(template.render(template_data))

# add deseq2 qmd file if rna
if assay == "rna":
Expand Down
73 changes: 24 additions & 49 deletions seqnado/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ def to_dataframe(self, simplify: bool = True):

@classmethod
def from_dataframe(cls, df: pd.DataFrame, simplified: bool = True, **kwargs):

experiments = {}
for experiment_name, row in df.iterrows():
if simplified:
Expand Down Expand Up @@ -682,71 +683,45 @@ def from_dataframe(cls, df: pd.DataFrame, simplified: bool = True, **kwargs):

return cls(assays=experiments, **kwargs)


def symlink_files_paired(
output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
):
r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}_1.fastq.gz")
r2_path_new = pathlib.Path(f"{output_dir}/{assay_name}_2.fastq.gz")

if not r1_path_new.exists():
try:
r1_path_new.symlink_to(assay.r1.path.resolve())
except FileExistsError:
logger.warning(f"Symlink for {r1_path_new} already exists.")

if assay.r2 and not r2_path_new.exists():
try:
r2_path_new.symlink_to(assay.r2.path.resolve())
except FileExistsError:
logger.warning(f"Symlink for {r2_path_new} already exists.")


def symlink_files_single(
output_dir: pathlib.Path, assay: Union[AssayNonIP, AssayIP], assay_name: str
):
r1_path_new = pathlib.Path(f"{output_dir}/{assay_name}.fastq.gz")

if not r1_path_new.exists():
def symlink_file(output_dir: pathlib.Path, source_path: pathlib.Path, new_file_name: str):
"""
Create a symlink in the output directory with the new file name.
"""
new_path = output_dir / new_file_name
if not new_path.exists():
try:
r1_path_new.symlink_to(assay.r1.path.resolve())
new_path.symlink_to(source_path.resolve())
except FileExistsError:
logger.warning(f"Symlink for {r1_path_new} already exists.")
logger.warning(f"Symlink for {new_path} already exists.")


def symlink_fastq_files(
design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/"
) -> None:
def symlink_fastq_files(design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/") -> None:
"""
Symlink the fastq files to the output directory.
"""
output_dir = pathlib.Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)

if isinstance(design, Design):
for assay_name, assay in design.assays.items():
symlink_file(output_dir, assay.r1.path, f"{assay_name}_1.fastq.gz")
if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)
symlink_file(output_dir, assay.r2.path, f"{assay_name}_2.fastq.gz")

elif isinstance(design, DesignIP):
for experiment_name, experiment in design.assays.items():
assay = experiment.ip_files
assay_name = assay.name

if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)
# IP files
ip_assay = experiment.ip_files
symlink_file(output_dir, ip_assay.r1.path, f"{ip_assay.name}_1.fastq.gz")
if ip_assay.is_paired:
symlink_file(output_dir, ip_assay.r2.path, f"{ip_assay.name}_2.fastq.gz")

if experiment.control_files:
assay = experiment.control_files
assay_name = assay.name
if assay.is_paired:
symlink_files_paired(output_dir, assay, assay_name)
else:
symlink_files_single(output_dir, assay, assay_name)
control_assay = experiment.control_files
control_r1_name = control_assay.r1.path.name
symlink_file(output_dir, control_assay.r1.path, control_r1_name)
if control_assay.is_paired:
control_r2_name = control_assay.r2.path.name
symlink_file(output_dir, control_assay.r2.path, control_r2_name)


def define_output_files(
Expand Down
49 changes: 49 additions & 0 deletions seqnado/workflow/rules/hub.smk
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,30 @@ def get_hub_input(wildcards):
return input_files


def get_peak_files(wildcards):
peak_files = []

if config["call_peaks"]:
if ASSAY == "ChIP":
peak_files.extend(
expand(
"seqnado_output/peaks/{method}/{sample}.bed",
method=config["peak_calling_method"],
sample=SAMPLE_NAMES_IP,
)
)
elif ASSAY == "ATAC":
peak_files.extend(
expand(
"seqnado_output/peaks/{method}/{sample}.bed",
method=config["peak_calling_method"],
sample=SAMPLE_NAMES,
)
)

return peak_files


rule save_design:
output:
"seqnado_output/design.csv",
Expand All @@ -102,9 +126,33 @@ rule save_design:
DESIGN.to_dataframe().to_csv("seqnado_output/design.csv", index=False)


rule validate_peaks:
input:
peaks=get_peak_files,
output:
sentinel="seqnado_output/peaks/.validated",
container:
None
log:
"seqnado_output/logs/validate_peaks.log",
run:
from loguru import logger

with logger.catch():
for peak_file in input.peaks:
with open(peak_file, "r+") as p:
peak_entries = p.readlines()
if len(peak_entries) < 1:
p.write("chr21\t1\t2\n")

with open(output.sentinel, "w") as s:
s.write("validated")


rule bed_to_bigbed:
input:
bed="seqnado_output/peaks/{directory}/{sample}.bed",
sentinel="seqnado_output/peaks/.validated",
output:
bigbed="seqnado_output/peaks/{directory}/{sample}.bigBed",
params:
Expand Down Expand Up @@ -149,3 +197,4 @@ rule generate_hub:

localrules:
generate_hub,
validate_peaks,
23 changes: 17 additions & 6 deletions seqnado/workflow/rules/peak_call_chip.smk
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,26 @@ def get_lanceotron_threshold(wildcards):

def get_control_bam(wildcards):
exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
return "seqnado_output/aligned/{sample}_{exp.control}.bam"
control = f"seqnado_output/aligned/{wildcards.sample}_{exp.control}.bam".replace(
" ", ""
)
return control


def get_control_tag(wildcards):
exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
return "seqnado_output/tag_dirs/{sample}_{exp.control}"
control = f"seqnado_output/tag_dirs/{wildcards.sample}_{exp.control}".replace(
" ", ""
)
return control


def get_control_bigwig(wildcards):
exp = DESIGN.query(sample_name=wildcards.sample, ip=wildcards.treatment)
return "seqnado_output/bigwigs/deeptools/{sample}_{exp.control}.bigWig"
control = f"seqnado_output/bigwigs/deeptools/{wildcards.sample}_{exp.control}.bigWig".replace(
" ", ""
)
return control


rule macs2_with_input:
Expand All @@ -43,7 +52,7 @@ rule macs2_with_input:
shell:
"""
macs2 callpeak -t {input.treatment} -c {input.control} -n seqnado_output/peaks/macs/{wildcards.treatment} -f BAMPE {params.options} > {log} 2>&1 &&
cat {params.narrow} | cut -f 1-3 > {output.peaks}
cat {params.narrow} | cut -f 1-3 > {output.peaks} || touch {output.peaks}
"""


Expand Down Expand Up @@ -132,7 +141,7 @@ rule lanceotron_with_input:
shell:
"""
lanceotron callPeaksInput {input.treatment} -i {input.control} -f {params.outdir} --skipheader > {log} 2>&1 &&
cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks}
cat {params.outdir}/{wildcards.treatment}_L-tron.bed | awk 'BEGIN{{OFS="\\t"}} $4 >= {params.threshold} {{print $1, $2, $3}}' > {output.peaks} || touch {output.peaks}
"""


Expand All @@ -159,4 +168,6 @@ rule lanceotron_no_input:
"""


ruleorder: lanceotron_with_input > lanceotron_no_input > homer_with_input > homer_no_input > macs2_with_input > macs2_no_input
ruleorder: lanceotron_with_input > lanceotron_no_input
ruleorder: homer_with_input > homer_no_input
ruleorder: macs2_with_input > macs2_no_input
Loading