Skip to content

Commit

Permalink
Update paths for consensus peaks and bigwigs
Browse files Browse the repository at this point in the history
  • Loading branch information
alsmith151 committed Feb 15, 2024
1 parent c20eb77 commit efade68
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 25 deletions.
22 changes: 15 additions & 7 deletions seqnado/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,10 @@ def from_dataframe(cls, df: pd.DataFrame, simplified: bool = True, **kwargs):

return cls(assays=experiments, **kwargs)

def symlink_file(output_dir: pathlib.Path, source_path: pathlib.Path, new_file_name: str):

def symlink_file(
output_dir: pathlib.Path, source_path: pathlib.Path, new_file_name: str
):
"""
Create a symlink in the output directory with the new file name.
"""
Expand All @@ -694,13 +697,16 @@ def symlink_file(output_dir: pathlib.Path, source_path: pathlib.Path, new_file_n
except FileExistsError:
logger.warning(f"Symlink for {new_path} already exists.")

def symlink_fastq_files(design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/") -> None:

def symlink_fastq_files(
design: Union[Design, DesignIP], output_dir: str = "seqnado_output/fastqs/"
) -> None:
"""
Symlink the fastq files to the output directory.
"""
output_dir = pathlib.Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)

if isinstance(design, Design):
for assay_name, assay in design.assays.items():
symlink_file(output_dir, assay.r1.path, f"{assay_name}_1.fastq.gz")
Expand All @@ -713,14 +719,16 @@ def symlink_fastq_files(design: Union[Design, DesignIP], output_dir: str = "seqn
ip_assay = experiment.ip_files
symlink_file(output_dir, ip_assay.r1.path, f"{ip_assay.name}_1.fastq.gz")
if ip_assay.is_paired:
symlink_file(output_dir, ip_assay.r2.path, f"{ip_assay.name}_2.fastq.gz")
symlink_file(
output_dir, ip_assay.r2.path, f"{ip_assay.name}_2.fastq.gz"
)

if experiment.control_files:
control_assay = experiment.control_files
control_r1_name = control_assay.r1.path.name
control_r1_name = control_assay.r1.path.name
symlink_file(output_dir, control_assay.r1.path, control_r1_name)
if control_assay.is_paired:
control_r2_name = control_assay.r2.path.name
control_r2_name = control_assay.r2.path.name
symlink_file(output_dir, control_assay.r2.path, control_r2_name)


Expand Down Expand Up @@ -810,7 +818,7 @@ def define_output_files(
for group_name, df in snakemake_design.to_dataframe().groupby("merge"):
assay_output.extend(
expand(
"seqnado_output/consensus_peaks/{group_name}.bed",
"seqnado_output/peaks/consensus/{group_name}.bed",
group_name=group_name,
)
)
Expand Down
26 changes: 14 additions & 12 deletions seqnado/workflow/rules/consensus_peaks.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ rule merge_bams:
input:
bams=get_bam_files_for_merge,
output:
"seqnado_output/consensus_peaks/{group}.bam",
"seqnado_output/consensus_peaks/bam/{group}.bam",
threads: 8
log:
"seqnado_output/consensus_peaks/{group}.log",
Expand All @@ -23,28 +23,30 @@ rule merge_bams:

use rule index_bam as index_consensus_bam with:
input:
bam="seqnado_output/consensus_peaks/{sample}.bam",
bam="seqnado_output/consensus_peaks/bam/{group}.bam",
output:
bai="seqnado_output/consensus_peaks/{sample}.bam.bai",
bai="seqnado_output/consensus_peaks/bam/{group}.bam.bai",
threads: 8


use rule deeptools_make_bigwigs as deeptools_make_bigwigs_consensus with:
input:
bam="seqnado_output/consensus_peaks/{sample}.bam",
bai="seqnado_output/consensus_peaks/{sample}.bam.bai",
bam="seqnado_output/consensus_peaks/bam/{group}.bam",
bai="seqnado_output/consensus_peaks/bam/{group}.bam.bai",
output:
bigwig="seqnado_output/consensus_peaks/{sample}.bigWig",
bigwig="seqnado_output/bigwigs/consensus/{group}.bigWig",
threads: 8
log:
"seqnado_output/logs/consensus_peaks/bigwigs/{group}.log",


rule lanceotron_no_input_consensus:
input:
group="seqnado_output/consensus_peaks/{group}.bigWig",
group="seqnado_output/bigwigs/consensus/{group}.bigWig",
output:
peaks="seqnado_output/consensus_peaks/{group}.bed",
peaks="seqnado_output/peaks/consensus/{group}.bed",
log:
"seqnado_output/consensus_peaks/{group}.log",
"seqnado_output/logs/consensus_peaks/peaks/{group}.log",
params:
options=seqnado.utils.check_options(config["lanceotron"]["callpeak"]),
outdir=lambda wc, output: os.path.dirname(output.peaks),
Expand All @@ -63,9 +65,9 @@ rule lanceotron_no_input_consensus:

use rule bed_to_bigbed as bed_to_bigbed_consensus with:
input:
bed="seqnado_output/consensus_peaks/{sample}.bed",
bed="seqnado_output/peaks/consensus/{group}.bed",
output:
bigbed="seqnado_output/consensus_peaks/{sample}.bigBed",
bigbed="seqnado_output/peaks/consensus/{group}.bigBed",
threads: 1
log:
"seqnado_output/consensus_peaks/{sample}.log",
"seqnado_output/logs/consensus_peaks/peaks/{group}.log",
5 changes: 3 additions & 2 deletions seqnado/workflow/rules/hub.smk
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_hub_input(wildcards):
if "merge" in DESIGN.to_dataframe().columns:
input_files.extend(
expand(
"seqnado_output/consensus_peaks/{group}.bigWig",
"seqnado_output/bigwigs/consensus/{group}.bigWig",
group=DESIGN.to_dataframe()["merge"].unique(),
)
)
Expand Down Expand Up @@ -125,7 +125,7 @@ def get_peak_files(wildcards):
if "merge" in DESIGN.to_dataframe().columns:
peak_files.extend(
expand(
"seqnado_output/consensus_peaks/{group}.bigBed",
"seqnado_output/consensus_peaks/peaks/{group}.bigBed",
group=DESIGN.to_dataframe().merge.unique(),
)
)
Expand Down Expand Up @@ -207,6 +207,7 @@ rule generate_hub:
params:
**get_hub_params(config),
assay=ASSAY,
has_consensus_peaks="merge" in DESIGN.to_dataframe().columns,
script:
"../scripts/create_hub.py"

Expand Down
14 changes: 10 additions & 4 deletions seqnado/workflow/scripts/create_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from loguru import logger
import tracknado


def get_rna_samplename(path: str):
p = pathlib.Path(path)
return re.split(r"_[plus|minus]", p.name)[0]
Expand All @@ -17,6 +18,12 @@ def get_rna_samplename(path: str):
snakemake.input.data,
columns=["fn"],
)


# Use the TrackFiles class to deduplicate files and add metadata
df = tracknado.TrackFiles(files=df, deduplicate=True).files


if snakemake.params.assay == "ChIP":
df[["samplename", "antibody"]] = df["fn"].str.extract(
r".*/(.*)_(.*)\.(?:bigBed|bigWig)"
Expand All @@ -32,14 +39,13 @@ def get_rna_samplename(path: str):
df["method"] = df["fn"].apply(lambda x: x.split("/")[-2])
df["strand"] = np.where(df["fn"].str.contains("_plus.bigWig"), "plus", "minus")

# remove dupilcates from design
df.drop_duplicates()

# Create hub design
design = tracknado.TrackDesign.from_design(
df,
color_by=snakemake.params.color_by,
subgroup_by=snakemake.params.subgroup_by,
subgroup_by=snakemake.params.subgroup_by
if any(snakemake.params.subgroup_by)
else None,
supergroup_by=snakemake.params.supergroup_by,
overlay_by=snakemake.params.overlay_by,
)
Expand Down

0 comments on commit efade68

Please sign in to comment.