Skip to content

Commit

Permalink
Merge pull request #12 from nf-core/promoters
Browse files Browse the repository at this point in the history
Add promoter prediction
  • Loading branch information
LeonHafner authored Jun 27, 2024
2 parents ad4a112 + d22124c commit 0e69270
Show file tree
Hide file tree
Showing 39 changed files with 1,066 additions and 821 deletions.
71 changes: 59 additions & 12 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,65 @@ process {
ext.suffix = "bed"
}

withName: FILTER_CONVERT_GTF {
ext.args = {"'BEGIN {OFS = \"\\t\"} \$3 == \"transcript\" {print \$1, \$4-1, \$4, \$1 \":\" \$4-1 \"-\" \$4, \$6, \$7}'"}
ext.suffix = "bed"
}

withName: SORT_BED {
ext.args = "-k1,1 -k2,2n"
ext.prefix = {"${meta.id}_sorted"}
ext.suffix = "bed"
}

withName: CONSTRUCT_TSS {
ext.args = "-b ${params.rose_tss_window}"
ext.prefix = "tss"
}

withName: FILTER_PREDICTIONS {
ext.args = "-A -f 1"
ext.prefix = {"${meta.id}_filtered"}
}

withName: STITCHING {
ext.args = "-d ${params.rose_stitching_window}"
ext.prefix = {"${meta.id}_stitched"}
}

withName: TSS_OVERLAP {
ext.args = "-c"
ext.prefix = {"${meta.id}_tss-overlap-counts"}
}

withName: FILTER_OVERLAPS {
ext.args = {"'BEGIN {OFS = \"\\t\"} \$NF >= 2 {print \$1, \$2, \$3}'"}
ext.prefix = {"${meta.id}_overlap"}
}

withName: UNSTITCHED_REGIONS {
ext.args = "-F 1"
ext.prefix = {"${meta.id}_original_regions"}
}

withName: CONCAT_AND_SORT {
ext.args = "-k1,1 -k2,2n"
ext.suffix = "bed"
}

withName: ".*:ROSE:CONCAT_AND_SORT" {
ext.prefix = {"${meta.id}_stitched"}
}

withName: ".*:FIMO:ADD_MISSING_COLUMNS" {
ext.args = "'BEGIN {OFS = \"\\t\"} {for (i = 1; i <= 6; i++) if (\$i == \"\") \$i = \".\"; print \$1, \$2, \$3, \$4, \$5, \$6}'"
ext.prefix = {"${meta.id}_unified"}
}

withName: ".*:FIMO:CONCAT_AND_SORT" {
ext.prefix = {"${meta.id}_sorted"}
}

withName: BEDTOOLS_SORT {
ext.prefix = {"${meta.id}.sorted"}
}
Expand All @@ -54,18 +113,6 @@ process {
ext.prefix = {"${meta.id}.merged"}
}

withName: ".*:CHROMHMM:REHEADER.*" {
ext.args = "-c 'sed -e \"s/SN:\\([0-9XY]*\\)/SN:chr\\\\1/\" -e \"s/SN:MT/SN:chrM/\"'"
}

withName: ".*:CHROMHMM:REHEADER_CONTROL" {
ext.prefix = {"${meta.id}_control"}
}

withName: UCSC_GTFTOGENEPRED {
ext.args = "-genePredExt"
}

withName: ".*DYNAMITE:FILTER" {
ext.args = {"'BEGIN{OFS=\"\\t\"} NR==1 || (\$2 >= ${params.dynamite_min_regression} || \$2 <= -${params.dynamite_min_regression} )'"}
ext.prefix = {"${meta.id}.filtered"}
Expand Down
3 changes: 2 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ workflow NFCORE_TFACTIVITY {
samplesheet_bam,
params.chromhmm_states,
params.chromhmm_threshold,
params.chromhmm_marks.split(','),
params.chromhmm_enhancer_marks.split(','),
params.chromhmm_promoter_marks.split(','),

// Peaks
params.window_size,
Expand Down
25 changes: 20 additions & 5 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,31 @@
"git_sha": "04bc484c987b523ea5420ed6bbc1fdc6d8aef751",
"installed_by": ["modules"]
},
"bedtools/complement": {
"branch": "master",
"git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
"installed_by": ["modules"]
},
"bedtools/getfasta": {
"branch": "master",
"git_sha": "cdcdd5e3d806f0ff3983c40c69e0b07bb44ec299",
"installed_by": ["modules"]
},
"bedtools/intersect": {
"branch": "master",
"git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
"installed_by": ["modules"]
},
"bedtools/merge": {
"branch": "master",
"git_sha": "a5377837fe9013bde89de8689829e83e84086536",
"installed_by": ["modules"]
},
"bedtools/slop": {
"branch": "master",
"git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
"installed_by": ["modules"]
},
"bedtools/sort": {
"branch": "master",
"git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47",
Expand All @@ -45,6 +60,11 @@
"git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08",
"installed_by": ["modules"]
},
"gnu/sort": {
"branch": "master",
"git_sha": "ca199cfe5aa4f1ea3c41302158f0af2cfaa58957",
"installed_by": ["modules"]
},
"gunzip": {
"branch": "master",
"git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
Expand All @@ -59,11 +79,6 @@
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"installed_by": ["modules"]
},
"ucsc/gtftogenepred": {
"branch": "master",
"git_sha": "acb0880789a6ebc2168d3b2d3d42b5bce6a62431",
"installed_by": ["modules"]
}
}
},
Expand Down
8 changes: 7 additions & 1 deletion modules/local/chromhmm/binarize_bams/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ process BINARIZE_BAMS {
tuple val(meta3), path(chromsizes)

output:
tuple val(meta), path("output")
tuple val(meta), path("output"), emit: binarized_bams
path "versions.yml", emit: versions

script:
"""
Expand All @@ -22,5 +23,10 @@ process BINARIZE_BAMS {
input \\
$table \\
output
cat <<-END_VERSIONS > versions.yml
"${task.process}":
chromhmm: \$(ChromHMM.sh Version | cut -f4 -d" ")
END_VERSIONS
"""
}
5 changes: 3 additions & 2 deletions modules/local/chromhmm/get_results/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ process GET_RESULTS {
val(marks)

output:
tuple val(meta), path("$output_file")
tuple val(meta), path("$output_file"), emit: regions
path "versions.yml", emit: versions

script:
output_file = "enhancers_${meta.id}.bed"
output_file = "${meta.id}.bed"
template "get_results.py"
}
35 changes: 35 additions & 0 deletions modules/local/chromhmm/get_results/templates/get_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@

import pandas as pd
import numpy as np
import platform


def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.
Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str


marks = "${marks.join(' ')}".split()

Expand Down Expand Up @@ -30,3 +52,16 @@

# Write output
bed.to_csv("$output_file", index=False, sep="\\t", header=False)


# Create version file
versions = {
"${task.process}" : {
"python": platform.python_version(),
"pandas": pd.__version__,
"numpy": np.__version__,
}
}

with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
8 changes: 7 additions & 1 deletion modules/local/chromhmm/learn_model/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ process LEARN_MODEL {
val states

output:
tuple val(meta), path("output/emissions_${states}.txt"), path("output/*_${states}_dense.bed")
tuple val(meta), path("output/emissions_${states}.txt"), path("output/*_${states}_dense.bed"), emit: model
path "versions.yml", emit: versions

script:
"""
Expand All @@ -24,5 +25,10 @@ process LEARN_MODEL {
output \\
$states \\
PLACEHOLDER
cat <<-END_VERSIONS > versions.yml
"${task.process}":
chromhmm: \$(ChromHMM.sh Version | cut -f4 -d" ")
END_VERSIONS
"""
}
27 changes: 0 additions & 27 deletions modules/local/rose/main.nf

This file was deleted.

Loading

0 comments on commit 0e69270

Please sign in to comment.