Skip to content

Commit

Permalink
fix: corrected statistics.smk rules
Browse files Browse the repository at this point in the history
  • Loading branch information
alsmith151 committed Oct 19, 2023
1 parent b32d17b commit 1c0018f
Showing 1 changed file with 40 additions and 151 deletions.
191 changes: 40 additions & 151 deletions capcruncher/pipeline/workflow/rules/statistics.smk
Original file line number Diff line number Diff line change
@@ -1,148 +1,32 @@
# from collections import defaultdict
# import capcruncher.pipeline.utils
# from typing import List
from collections import defaultdict
import capcruncher.pipeline.utils
from typing import List


# # def get_digestion_statistics(wc, sample_names: List[str]):
# # stat_types = {
# # "read_level_stats": "digestion.read.summary.csv",
# # "histogram_unfiltered": "digestion.unfilt.histogram.csv",
# # "histogram_filtered": "digestion.filt.histogram.csv",
# # }

# # stat_prefixes = []
# # for sample in sample_names:
# # for combined in ["flashed", "pe"]:
# # for part in get_rebalanced_parts(wc, combined=combined, sample=sample):
# # stat_prefixes.append(
# # f"capcruncher_output/interim/statistics/digestion/data/{sample}_part{part}_{combined}."
# # )

# # stat_files = defaultdict(list)
# # for stat_type, stat_suffix in stat_types.items():
# # for stat_prefix in stat_prefixes:
# # stat_files[stat_type].append(stat_prefix + stat_suffix)

# # return stat_files


# # def get_filtering_statistics(wc, sample_names: List[str]):
# # stat_types = {
# # "read_level_stats": "read.stats.csv",
# # "slice_level_stats": "slice.stats.csv",
# # }

# # stat_prefixes = []
# # for sample in sample_names:
# # for combined in ["flashed", "pe"]:
# # for part in get_rebalanced_parts(wc, combined=combined, sample=sample):
# # stat_prefixes.append(
# # f"capcruncher_output/interim/statistics/filtering/data/{sample}_part{part}_{combined}."
# # )

# # stat_files = defaultdict(list)
# # for stat_type, stat_suffix in stat_types.items():
# # for stat_prefix in stat_prefixes:
# # stat_files[stat_type].append(stat_prefix + stat_suffix)

# # return stat_files


# # def get_stat_parts(wc, sample_names: List[str]):
# # files = []
# # for sample in sample_names:
# # for part in get_fastq_split_1(wc):
# # files.append(
# # f"capcruncher_output/interim/statistics/deduplication/data/{sample}_part{part}.deduplication.csv"
# # )
# # return files


# # rule combine_stats_fastq_deduplication:
# # input:
# # fastq_deduplication=expand(
# # "capcruncher_output/interim/statistics/deduplication/data/{sample}.deduplication.json",
# # sample=SAMPLE_NAMES,
# # ),
# # output:
# # "capcruncher_output/interim/statistics/deduplication/fastq_deduplication.csv",
# # script:
# # "../scripts/combine_deduplication_stats.py"


# # rule combine_stats_digestion:
# # input:
# # unpack(lambda wc: get_digestion_statistics(wc, SAMPLE_NAMES)),
# # output:
# # read_data="capcruncher_output/interim/statistics/digestion/fastq_digestion.csv",
# # histogram="capcruncher_output/interim/statistics/digestion/fastq_digestion.histogram.csv",
# # script:
# # "../scripts/combine_digestion_stats.py"


# # rule combine_stats_filtering:
# # input:
# # unpack(lambda wc: get_filtering_statistics(wc, SAMPLE_NAMES)),
# # output:
# # read_data="capcruncher_output/interim/statistics/filtering/alignment_filtering.csv",
# # slice_data="capcruncher_output/interim/statistics/filtering/alignment_filtering_slice.csv",
# # script:
# # "../scripts/combine_filtering_stats.py"


# # rule combine_stats_alignment_deduplication:
# # input:
# # read_level_stats=expand(
# # "capcruncher_output/interim/statistics/deduplication_by_coordinate/data/{sample}_{combined}.read.stats.csv",
# # sample=SAMPLE_NAMES,
# # combined=["flashed", "pe"],
# # ),
# # output:
# # read_data="capcruncher_output/interim/statistics/deduplication_by_coordinate/alignment_deduplication.csv",
# # script:
# # "../scripts/combine_alignment_deduplication_stats.py"


# # rule merge_stats_filtering_and_alignment_deduplication:
# # input:
# # filtering=rules.combine_stats_filtering.output.read_data,
# # alignment_deduplication=rules.combine_stats_alignment_deduplication.output.read_data,
# # output:
# # "capcruncher_output/interim/statistics/filtering_and_alignment_deduplication.csv",
# # log:
# # "capcruncher_output/logs/merge_stats_filtering_and_alignment_deduplication.log",
# # shell:
# # """
# # cat {input.filtering} > {output} 2> {log};
# # cat {input.alignment_deduplication} | sed '1d' >> {output} 2>> {log};
# # """

def get_digestion_statistics(wc, sample_names: List[str]):

stat_files = []
for sample in sample_names:
for combined in ["flashed", "pe"]:
for part in get_rebalanced_parts(wc, combined=combined, sample=sample):
stat_files.append(
f"capcruncher_output/interim/statistics/digestion/data/{sample}_part{part}_{combined}.json"
)

return stat_files

# # rule combine_stats_cis_and_trans:
# # input:
# # cis_and_trans_stats=expand(
# # "capcruncher_output/interim/statistics/cis_and_trans_reporters/data/{sample}.reporter.stats.csv",
# # sample=SAMPLE_NAMES,
# # ),
# # output:
# # cis_and_trans_stats="capcruncher_output/interim/statistics/cis_and_trans_reporters/cis_and_trans_reporters.csv",
# # log:
# # "capcruncher_output/logs/statistics/combine_stats_cis_and_trans_stats.log",
# # script:
# # "../scripts/combine_cis_and_trans_stats.py"
def get_filtering_statistics(wc, sample_names: List[str]):

stat_files = []
for sample in sample_names:
for combined in ["flashed", "pe"]:
for part in get_rebalanced_parts(wc, combined=combined, sample=sample):
stat_files.append(
f"capcruncher_output/interim/statistics/filtering/data/{sample}_part{part}_{combined}.json"
)

return stat_files

# # rule combine_stats_read_level:
# # input:
# # [
# # rules.combine_stats_fastq_deduplication.output[0],
# # rules.combine_stats_digestion.output.read_data,
# # rules.merge_stats_filtering_and_alignment_deduplication.output[0],
# # ],
# # output:
# # "capcruncher_output/interim/statistics/run_statistics.csv",
# # script:
# # "../scripts/combine_stats_read_level.py"

rule copy_report_template:
input:
Expand Down Expand Up @@ -182,15 +66,20 @@ rule make_report:
),
fastq_trimming=rules.extract_trimming_data.output[0],
fastq_flash=rules.extract_flash_data.output[0],
digestion_read=rules.combine_stats_digestion.output.read_data,
digestion_histogram=rules.combine_stats_digestion.output.histogram,
reporters=rules.merge_stats_filtering_and_alignment_deduplication.output[0],
cis_and_trans_stats=rules.combine_stats_cis_and_trans.output.cis_and_trans_stats,
read_level_stats=rules.combine_stats_read_level.output[0],
fastq_digestion=lambda wc: get_digestion_statistics(wc, SAMPLE_NAMES),
reporters=lambda wc: get_filtering_statistics(wc, SAMPLE_NAMES),
cis_and_trans_stats=expand(
"capcruncher_output/interim/statistics/cis_and_trans_reporters/data/{sample}.json",
sample=SAMPLE_NAMES,
),
output:
"capcruncher_output/results/capcruncher_report.html",
params:
outdir=lambda wildcards, output: pathlib.Path(output[0]).parent,
fastq_deduplication_path="capcruncher_output/interim/statistics/deduplication/data/",
fastq_digestion_path="capcruncher_output/interim/statistics/digestion/data/",
reporter_filtering_path="capcruncher_output/interim/statistics/filtering/data/",
reporter_cis_trans_path="capcruncher_output/interim/statistics/cis_and_trans_reporters/data/",
log:
"capcruncher_output/logs/make_report.log",
shell:
Expand All @@ -201,12 +90,12 @@ rule make_report:
{params.outdir}/capcruncher_report.qmd \
--to html \
--execute \
-P fastq_deduplication_path:$(realpath {input.fastq_deduplication}) \
-P fastq_digestion_read_path:$(realpath {input.digestion_read}) \
-P fastq_digestion_hist_path:$(realpath {input.digestion_histogram}) \
-P reporter_read_path:$(realpath {input.reporters}) \
-P reporter_cis_trans_path:$(realpath {input.cis_and_trans_stats}) \
-P run_stats_path:$(realpath {input.read_level_stats}) \
-P fastq_deduplication_path:$(realpath {params.fastq_deduplication_path}) \
-P fastq_trimming_path:$(realpath {input.fastq_trimming}) \
-P fastq_flash_path:$(realpath {input.fastq_flash}) \
-P fastq_digestion_path:$(realpath {params.fastq_digestion_path}) \
-P reporter_filtering_path:$(realpath {params.reporter_filtering_path}) \
-P reporter_cis_trans_path:$(realpath {params.reporter_cis_trans_path}) \
--log {log} \
2> {log}.err;
Expand Down

0 comments on commit 1c0018f

Please sign in to comment.