Skip to content

Commit

Permalink
Use standard genepred file in ROSE
Browse files Browse the repository at this point in the history
  • Loading branch information
nictru committed Apr 25, 2024
1 parent 7c63762 commit cd1cb9a
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 35 deletions.
4 changes: 4 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ process {
ext.prefix = {"${meta.id}_control"}
}

withName: UCSC_GTFTOGENEPRED {
ext.args = "-genePredExt"
}

withName: ".*DYNAMITE:FILTER" {
ext.args = {"'BEGIN{OFS=\"\\t\"} NR==1 || (\$2 >= ${params.dynamite_min_regression} || \$2 <= -${params.dynamite_min_regression} )'"}
ext.prefix = {"${meta.id}.filtered"}
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
"branch": "master",
"git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
"installed_by": ["modules"]
},
"ucsc/gtftogenepred": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion modules/local/rose/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process ROSE {

input:
tuple val(meta), path(bed)
path ucsc_file
tuple val(meta2), path(genepred)

output:
tuple val(meta), path("${meta.id}.rose.bed"), emit: stitched
Expand Down
61 changes: 30 additions & 31 deletions modules/local/rose/templates/rose.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,33 +166,31 @@ def format_folder(folder_name, create=False):
# ==================================================================


def make_start_dict(annot_file, gene_list=[]):
def make_start_dict(annot_file):
"""
makes a dictionary keyed by refseq ID that contains information about
chrom/start/stop/strand/common name
"""

if type(gene_list) == str:
gene_list = parse_table(gene_list, '\\t')
gene_list = [line[0] for line in gene_list]

if annot_file.upper().count('REFSEQ') == 1:
refseq_table, refseq_dict = import_refseq(annot_file)
if len(gene_list) == 0:
gene_list = list(refseq_dict.keys())
start_dict = {}
for gene in gene_list:
if gene not in refseq_dict:
continue
start_dict[gene] = {}
start_dict[gene]['sense'] = refseq_table[refseq_dict[gene][0]][3]
start_dict[gene]['chr'] = refseq_table[refseq_dict[gene][0]][2]
start_dict[gene]['start'] = get_tsss([gene], refseq_table, refseq_dict)
if start_dict[gene]['sense'] == '+':
start_dict[gene]['end'] = [int(refseq_table[refseq_dict[gene][0]][5])]
else:
start_dict[gene]['end'] = [int(refseq_table[refseq_dict[gene][0]][4])]
start_dict[gene]['name'] = refseq_table[refseq_dict[gene][0]][12]
transcripts = []

refseq_table, refseq_dict = import_refseq(annot_file)
if len(transcripts) == 0:
transcripts = list(refseq_dict.keys())
start_dict = {}
for transcript in transcripts:
if transcript not in refseq_dict:
continue
start_dict[transcript] = {}
start_dict[transcript]['sense'] = refseq_table[refseq_dict[transcript][0]][2]
start_dict[transcript]['chr'] = refseq_table[refseq_dict[transcript][0]][1]
start_dict[transcript]['start'] = get_tsss([transcript], refseq_table, refseq_dict)
if start_dict[transcript]['sense'] == '+':
start_dict[transcript]['end'] = [int(refseq_table[refseq_dict[transcript][0]][4])]
else:
start_dict[transcript]['end'] = [int(refseq_table[refseq_dict[transcript][0]][3])]
start_dict[transcript]['name'] = refseq_table[refseq_dict[transcript][0]][11]

return start_dict


Expand All @@ -204,10 +202,10 @@ def get_tsss(gene_list, refseq_table, refseq_dict):
refseq = refseq_from_key(gene_list, refseq_dict, refseq_table)
tss = []
for line in refseq:
if line[3] == '+':
if line[2] == '+':
tss.append(line[3])
if line[2] == '-':
tss.append(line[4])
if line[3] == '-':
tss.append(line[5])
tss = list(map(int, tss))

return tss
Expand All @@ -234,12 +232,13 @@ def import_refseq(refseq_file, return_multiples=False):
"""
refseq_table = parse_table(refseq_file, '\\t')
refseq_dict = {}
ticker = 1
for line in refseq_table[1:]:
if line[1] in refseq_dict:
refseq_dict[line[1]].append(ticker)
ticker = 0
for line in refseq_table:
transcript = line[0]
if transcript in refseq_dict:
refseq_dict[transcript].append(ticker)
else:
refseq_dict[line[1]] = [ticker]
refseq_dict[transcript] = [ticker]
ticker = ticker + 1

multiples = []
Expand Down Expand Up @@ -616,7 +615,7 @@ def idfun(x): return x
return result


start_dict = make_start_dict("$ucsc_file")
start_dict = make_start_dict("$genepred")
locus_collection = bed_to_locus_collection("$bed")
stitched_collection = region_stitching(locus_collection, int("$stitch"), int("$tss_dist"), start_dict)
stitched = locus_collection_to_bed(stitched_collection)
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/ucsc/gtftogenepred/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions modules/nf-core/ucsc/gtftogenepred/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions modules/nf-core/ucsc/gtftogenepred/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion subworkflows/local/peaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ workflow PEAKS {
}

CHROMHMM(ch_samplesheet_bam, chrom_sizes, chromhmm_states, chromhmm_threshold, chromhmm_marks)
ROSE(CHROMHMM.out.enhancers, rose_ucsc)
ROSE(CHROMHMM.out.enhancers, gtf.map{gtf -> [[id: "gtf"], gtf]})

ch_versions = ch_versions.mix(CHROMHMM.out.versions)
ch_versions = ch_versions.mix(ROSE.out.versions)
Expand Down
7 changes: 5 additions & 2 deletions subworkflows/local/rose.nf
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
include { ROSE as RUN_ROSE } from "../../modules/local/rose"
include { UCSC_GTFTOGENEPRED } from "../../modules/nf-core/ucsc/gtftogenepred"

workflow ROSE {
take:
ch_bed
ucsc_file
ch_gtf

main:

ch_versions = Channel.empty()

RUN_ROSE(ch_bed, ucsc_file)
UCSC_GTFTOGENEPRED(ch_gtf)
RUN_ROSE(ch_bed, UCSC_GTFTOGENEPRED.out.genepred)

ch_versions = ch_versions.mix(RUN_ROSE.out.versions)
ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions)

emit:
enhancers = RUN_ROSE.out.stitched
Expand Down

0 comments on commit cd1cb9a

Please sign in to comment.