Skip to content

Commit

Permalink
Merge pull request #5 from nf-core/simplify-rose
Browse files Browse the repository at this point in the history
Simplify rose
  • Loading branch information
nictru authored May 1, 2024
2 parents cb4cb9e + a6c2925 commit df1a03f
Show file tree
Hide file tree
Showing 14 changed files with 210 additions and 181 deletions.
6 changes: 0 additions & 6 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc"
}
'GRCh38' {
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -38,7 +37,6 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc"
}
'CHM13' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -61,7 +59,6 @@ params {
macs_gsize = "1.87e9"
blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc"
}
'TAIR10' {
fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa"
Expand Down Expand Up @@ -297,7 +294,6 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc"
}
'hg19' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -312,7 +308,6 @@ params {
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc"
}
'mm10' {
fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -327,7 +322,6 @@ params {
macs_gsize = "1.87e9"
blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed"
pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt"
rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc"
}
'bosTau8' {
fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa"
Expand Down
20 changes: 3 additions & 17 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ process {
}

withName: CLEAN_BED {
ext.args = {"'{print \$1 \"\\t\" \$2 \"\\t\" \$3}'"}
ext.args = {"'{print \$1 \"\\t\" \$2 \"\\t\" \$3 \"\\t\" \$4 \"\\t\" \$5 \"\\t\" \$6}'"}
ext.prefix = {"${meta.id}.clean"}
ext.suffix = "bed"
}
Expand Down Expand Up @@ -62,22 +62,8 @@ process {
ext.prefix = {"${meta.id}_control"}
}

withName: BED_TO_GFF {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} { print \$1, \"bed2gff\", \"region\", \$2+1, \$3, \".\", \".\", \".\", \".\"}'"}
ext.prefix = {"$meta.id"}
ext.suffix = "gff"
}

withName: REFORMAT_GFF {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {if(!match(\$1, /^chr/)) \$1=\"chr\"\$1; \$2=\"seq_\"NR; print \$1, \$2, \"\", \$4, \$5, \"\", \$7, \"\", \$2}'"}
ext.prefix = {"${meta.id}_reformatted"}
ext.suffix = "gff"
}

withName: ROSE_OUTPUT_TO_BED {
ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {print \$1, \$4-1, \$5}'"}
ext.prefix = {"$meta.id"}
ext.suffix = "bed"
withName: UCSC_GTFTOGENEPRED {
ext.args = "-genePredExt"
}

withName: ".*DYNAMITE:FILTER" {
Expand Down
2 changes: 0 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ params.fasta = getGenomeAttribute('fasta')
params.gtf = getGenomeAttribute('gtf')
params.blacklist = getGenomeAttribute('blacklist')
params.pwms = getGenomeAttribute('pwms')
params.rose_ucsc = getGenomeAttribute('rose_ucsc')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -94,7 +93,6 @@ workflow NFCORE_TFACTIVITY {
params.chromhmm_states,
params.chromhmm_threshold,
params.chromhmm_marks.split(','),
params.rose_ucsc,
params.window_size,
params.decay,
params.merge_samples,
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@
"branch": "master",
"git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
"installed_by": ["modules"]
},
"ucsc/gtftogenepred": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
}
}
},
Expand Down
13 changes: 8 additions & 5 deletions modules/local/chromhmm/get_results/templates/get_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,23 @@
emissions = pd.read_csv("$emissions", sep = "\\t")[["State (Emission order)"] + marks].rename(columns={"State (Emission order)": "State"})


# Read input bed file and remove unecessary columns
# Read input bed file
bed = pd.read_csv("$bed",
sep="\\t",
skiprows=1,
names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"]
).drop(columns=["strand", "score", "start_1", "end_1", "rgb"])
)


# Keep state if any of the marks is enriched > threshold for this state
states = emissions[np.any([emissions[mark] >= $threshold for mark in marks], axis=0)]["State"].tolist()
states = emissions[np.any([emissions[mark] >= float("$threshold") for mark in marks], axis=0)]["State"].tolist()


# Subset bed file for selected states
out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"])
bed = bed[np.isin(bed["state"], states)].drop(columns=["state"])
bed["name"] = bed["chr"] + ":" + bed["start"].astype(str) + "-" + bed["end"].astype(str)

bed = bed[["chr", "start", "end", "name", "score", "strand"]]

# Write output
out_bed.to_csv("$output_file", index=False, sep="\\t", header=False)
bed.to_csv("$output_file", index=False, sep="\\t", header=False)
9 changes: 5 additions & 4 deletions modules/local/rose/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ process ROSE {
'biocontainers/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0' }"

input:
tuple val(meta), path(gff)
path ucsc_file
tuple val(meta), path(bed)
tuple val(meta2), path(genepred)

output:
tuple val(meta), path("${gff.baseName}_STITCHED.gff")
tuple val(meta), path("${meta.id}.rose.bed"), emit: stitched
path("versions.yml") , emit: versions

script:
stitch = 12500
Expand All @@ -21,6 +22,6 @@ process ROSE {

stub:
"""
touch "${gff.baseName}_STITCHED.gff"
touch "${meta.id}.rose.bed"
"""
}
Loading

0 comments on commit df1a03f

Please sign in to comment.