diff --git a/conf/modules.config b/conf/modules.config index 97b7d44..1af2222 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -28,6 +28,65 @@ process { ext.suffix = "bed" } + withName: FILTER_CONVERT_GTF { + ext.args = {"'BEGIN {OFS = \"\\t\"} \$3 == \"transcript\" {print \$1, \$4-1, \$4, \$1 \":\" \$4-1 \"-\" \$4, \$6, \$7}'"} + ext.suffix = "bed" + } + + withName: SORT_BED { + ext.args = "-k1,1 -k2,2n" + ext.prefix = {"${meta.id}_sorted"} + ext.suffix = "bed" + } + + withName: CONSTRUCT_TSS { + ext.args = "-b ${params.rose_tss_window}" + ext.prefix = "tss" + } + + withName: FILTER_PREDICTIONS { + ext.args = "-A -f 1" + ext.prefix = {"${meta.id}_filtered"} + } + + withName: STITCHING { + ext.args = "-d ${params.rose_stitching_window}" + ext.prefix = {"${meta.id}_stitched"} + } + + withName: TSS_OVERLAP { + ext.args = "-c" + ext.prefix = {"${meta.id}_tss-overlap-counts"} + } + + withName: FILTER_OVERLAPS { + ext.args = {"'BEGIN {OFS = \"\\t\"} \$NF >= 2 {print \$1, \$2, \$3}'"} + ext.prefix = {"${meta.id}_overlap"} + } + + withName: UNSTITCHED_REGIONS { + ext.args = "-F 1" + ext.prefix = {"${meta.id}_original_regions"} + } + + withName: CONCAT_AND_SORT { + ext.args = "-k1,1 -k2,2n" + ext.suffix = "bed" + } + + withName: ".*:ROSE:CONCAT_AND_SORT" { + ext.prefix = {"${meta.id}_stitched"} + } + + withName: ".*:FIMO:ADD_MISSING_COLUMNS" { + ext.args = "'BEGIN {OFS = \"\\t\"} {for (i = 1; i <= 6; i++) if (\$i == \"\") \$i = \".\"; print \$1, \$2, \$3, \$4, \$5, \$6}'" + ext.prefix = {"${meta.id}_unified"} + } + + withName: ".*:FIMO:CONCAT_AND_SORT" { + ext.prefix = {"${meta.id}_sorted"} + } + withName: BEDTOOLS_SORT { ext.prefix = {"${meta.id}.sorted"} } @@ -54,18 +113,6 @@ process { ext.prefix = {"${meta.id}.merged"} } - withName: ".*:CHROMHMM:REHEADER.*" { - ext.args = "-c 'sed -e \"s/SN:\\([0-9XY]*\\)/SN:chr\\\\1/\" -e \"s/SN:MT/SN:chrM/\"'" - } - - withName: ".*:CHROMHMM:REHEADER_CONTROL" { - ext.prefix = {"${meta.id}_control"} - } - - withName: UCSC_GTFTOGENEPRED { - ext.args = "-genePredExt" - } - withName: ".*DYNAMITE:FILTER" { ext.args = {"'BEGIN{OFS=\"\\t\"} NR==1 || (\$2 >= ${params.dynamite_min_regression} || \$2 <= -${params.dynamite_min_regression} )'"} ext.prefix = {"${meta.id}.filtered"} diff --git a/main.nf b/main.nf index 4ba5a3e..1ee3f8a 100644 --- a/main.nf +++ b/main.nf @@ -96,7 +96,8 @@ workflow NFCORE_TFACTIVITY { samplesheet_bam, params.chromhmm_states, params.chromhmm_threshold, - params.chromhmm_marks.split(','), + params.chromhmm_enhancer_marks.split(','), + params.chromhmm_promoter_marks.split(','), // Peaks params.window_size, diff --git a/modules.json b/modules.json index bc5dd00..ba3b6d5 100644 --- a/modules.json +++ b/modules.json @@ -10,16 +10,31 @@ "git_sha": "04bc484c987b523ea5420ed6bbc1fdc6d8aef751", "installed_by": ["modules"] }, + "bedtools/complement": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "installed_by": ["modules"] + }, "bedtools/getfasta": { "branch": "master", "git_sha": "cdcdd5e3d806f0ff3983c40c69e0b07bb44ec299", "installed_by": ["modules"] }, + "bedtools/intersect": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "installed_by": ["modules"] + }, "bedtools/merge": { "branch": "master", "git_sha": "a5377837fe9013bde89de8689829e83e84086536", "installed_by": ["modules"] }, + "bedtools/slop": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "installed_by": ["modules"] + }, "bedtools/sort": { "branch": "master", "git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47", @@ -45,6 +60,11 @@ "git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08", "installed_by": ["modules"] }, + "gnu/sort": { + "branch": "master", + "git_sha": "ca199cfe5aa4f1ea3c41302158f0af2cfaa58957", + "installed_by": ["modules"] + }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", @@ -59,11 +79,6 @@ "branch": "master", "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] - }, - "ucsc/gtftogenepred": { - "branch": "master", - "git_sha": "acb0880789a6ebc2168d3b2d3d42b5bce6a62431", - "installed_by": ["modules"] } } }, diff --git a/modules/local/chromhmm/binarize_bams/main.nf b/modules/local/chromhmm/binarize_bams/main.nf index f61558f..901d3dc 100644 --- a/modules/local/chromhmm/binarize_bams/main.nf +++ b/modules/local/chromhmm/binarize_bams/main.nf @@ -13,7 +13,8 @@ process BINARIZE_BAMS { tuple val(meta3), path(chromsizes) output: - tuple val(meta), path("output") + tuple val(meta), path("output"), emit: binarized_bams + path "versions.yml", emit: versions script: """ @@ -22,5 +23,10 @@ process BINARIZE_BAMS { input \\ $table \\ output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromhmm: \$(ChromHMM.sh Version | cut -f4 -d" ") + END_VERSIONS """ } diff --git a/modules/local/chromhmm/get_results/main.nf b/modules/local/chromhmm/get_results/main.nf index f6b34c9..fc85232 100644 --- a/modules/local/chromhmm/get_results/main.nf +++ b/modules/local/chromhmm/get_results/main.nf @@ -13,9 +13,10 @@ process GET_RESULTS { val(marks) output: - tuple val(meta), path("$output_file") + tuple val(meta), path("$output_file"), emit: regions + path "versions.yml", emit: versions script: - output_file = "enhancers_${meta.id}.bed" + output_file = "${meta.id}.bed" template "get_results.py" } diff --git a/modules/local/chromhmm/get_results/templates/get_results.py b/modules/local/chromhmm/get_results/templates/get_results.py index 28fcf20..210972f 100755 --- a/modules/local/chromhmm/get_results/templates/get_results.py +++ b/modules/local/chromhmm/get_results/templates/get_results.py @@ -3,6 +3,28 @@ import pandas as pd import numpy as np +import platform + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + marks = "${marks.join(' ')}".split() @@ -30,3 +52,16 @@ # Write output bed.to_csv("$output_file", index=False, sep="\\t", header=False) + + +# Create version file +versions = { + "${task.process}" : { + "python": platform.python_version(), + "pandas": pd.__version__, + "numpy": np.__version__, + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/local/chromhmm/learn_model/main.nf b/modules/local/chromhmm/learn_model/main.nf index eac4221..73d9ead 100644 --- a/modules/local/chromhmm/learn_model/main.nf +++ b/modules/local/chromhmm/learn_model/main.nf @@ -12,7 +12,8 @@ process LEARN_MODEL { val states output: - tuple val(meta), path("output/emissions_${states}.txt"), path("output/*_${states}_dense.bed") + tuple val(meta), path("output/emissions_${states}.txt"), path("output/*_${states}_dense.bed"), emit: model + path "versions.yml", emit: versions script: """ @@ -24,5 +25,10 @@ process LEARN_MODEL { output \\ $states \\ PLACEHOLDER + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromhmm: \$(ChromHMM.sh Version | cut -f4 -d" ") + END_VERSIONS """ } diff --git a/modules/local/rose/main.nf b/modules/local/rose/main.nf deleted file mode 100644 index e9c99f3..0000000 --- a/modules/local/rose/main.nf +++ /dev/null @@ -1,27 +0,0 @@ -process ROSE { - tag "$meta.id" - label 'process_single' - - conda "conda-forge::mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6==fccb0c41a243c639e11dd1be7b74f563e624fcca-0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0': - 'biocontainers/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0' }" - - input: - tuple val(meta), path(bed) - tuple val(meta2), path(genepred) - - output: - tuple val(meta), path("${meta.id}.rose.bed"), emit: stitched - path("versions.yml") , emit: versions - - script: - stitch = 12500 - tss_dist = 2500 - template "rose.py" - - stub: - """ - touch "${meta.id}.rose.bed" - """ -} diff --git a/modules/local/rose/templates/rose.py b/modules/local/rose/templates/rose.py deleted file mode 100755 index 407f825..0000000 --- a/modules/local/rose/templates/rose.py +++ /dev/null @@ -1,621 +0,0 @@ -#!/usr/bin/env python3 - -import os -import platform - -def format_yaml_like(data: dict, indent: int = 0) -> str: - """Formats a dictionary to a YAML-like string. - - Args: - data (dict): The dictionary to format. - indent (int): The current indentation level. - - Returns: - str: A string formatted as YAML. - """ - yaml_str = "" - for key, value in data.items(): - spaces = " " * indent - if isinstance(value, dict): - yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" - else: - yaml_str += f"{spaces}{key}: {value}\\n" - return yaml_str - -def region_stitching(bound_collection, stitch_window, tss_window, start_dict): - print('Performing region stitching...') - - remove_tss = tss_window != 0 - - # filter out all bound regions that overlap the TSS of an ACTIVE GENE - if remove_tss: - # now makeTSS loci for active genes - remove_ticker = 0 - # this loop makes a locus centered around +/- tss_window of transcribed genes - # then adds it to the list tss_loci - tss_loci = [] - for gene_id in list(start_dict.keys()): - tss_loci.append(make_tss_locus(gene_id, start_dict, tss_window, tss_window)) - - # this turns the tss_loci list into a LocusCollection - # 50 is the internal parameter for LocusCollection and doesn't really matter - tss_collection = LocusCollection(tss_loci, 50) - - # gives all the loci in bound_collection - bound_loci = bound_collection.get_loci() - - # this loop will check if each bound region is contained by the TSS exclusion zone - # this will drop out a lot of the promoter only regions that are tiny - # typical exclusion window is around 2kb - for locus in bound_loci: - if len(tss_collection.get_containers(locus, 'both')) > 0: - # if true, the bound locus overlaps an active gene - bound_collection.remove(locus) - remove_ticker += 1 - print(f'Removed {remove_ticker} loci because they were contained by a TSS') - - # bound_collection is now all enriched region loci that don't overlap an active TSS - stitched_collection = bound_collection.stitch_collection(stitch_window, 'both') - - if remove_tss: - # now replace any stitched region that overlap 2 distinct genes - # with the original loci that were there - fixed_loci = [] - tss_loci = [] - for gene_id in list(start_dict.keys()): - tss_loci.append(make_tss_locus(gene_id, start_dict, 50, 50)) - - # this turns the tss_loci list into a LocusCollection - # 50 is the internal parameter for LocusCollection and doesn't really matter - tss_collection = LocusCollection(tss_loci, 50) - remove_ticker = 0 - original_ticker = 0 - for stitched_locus in stitched_collection.get_loci(): - overlapping_tss_loci = tss_collection.get_overlap(stitched_locus, 'both') - tss_names = [start_dict[tssLocus.id()]['name'] for tssLocus in overlapping_tss_loci] - tss_names = uniquify(tss_names) - if len(tss_names) > 2: - original_loci = bound_collection.get_overlap(stitched_locus, 'both') - original_ticker += len(original_loci) - fixed_loci += original_loci - remove_ticker += 1 - else: - fixed_loci.append(stitched_locus) - - print(f'Removed {remove_ticker} stitched loci because they overlapped multiple TSSs') - print(f'Added back {original_ticker} original loci') - fixed_collection = LocusCollection(fixed_loci, 50) - return fixed_collection - else: - return stitched_collection - - -# ================================================================== -# ==========================I/O FUNCTIONS=========================== -# ================================================================== - -# unparse_table 4/14/08 -# takes in a table generated by parse_table and writes it to an output file -# takes as parameters (table, output, sep), where sep is how the file is delimited -# example call unparse_table(table, 'table.txt', '\t') for a tab del file - -def unparse_table(table, output, sep): - fh_out = open(output, 'w') - if len(sep) == 0: - for i in table: - fh_out.write(str(i)) - fh_out.write('\\n') - else: - for line in table: - line = [str(x) for x in line] - line = sep.join(line) - - fh_out.write(line) - fh_out.write('\\n') - - fh_out.close() - - -# parse_table 4/14/08 -# takes in a table where columns are separated by a given symbol and outputs -# a nested list such that list[row][col] -# example call: -# table = parse_table('file.txt','\t') -def parse_table(fn, sep, header=False, excel=False): - fh = open(fn) - lines = fh.readlines() - fh.close() - if excel: - lines = lines[0].split('\\r') - if lines[0].count('\\r') > 0: - lines = lines[0].split('\\r') - table = [] - if header: - lines = lines[1:] - for i in lines: - table.append(i[:-1].split(sep)) - - return table - - -def format_folder(folder_name, create=False): - """ - makes sure a folder exists and if not makes it - returns a bool for folder - """ - - if folder_name[-1] != '/': - folder_name += '/' - - try: - foo = os.listdir(folder_name) - return folder_name - except OSError: - print(f'folder {folder_name} does not exist') - if create: - os.system(f'mkdir {folder_name}') - return folder_name - else: - - return False - - # ================================================================== - - -# ===================ANNOTATION FUNCTIONS=========================== -# ================================================================== - - -def make_start_dict(annot_file): - transcripts = [] - - genepred_table, genepred_dict = import_genepred(annot_file) - if len(transcripts) == 0: - transcripts = list(genepred_dict.keys()) - start_dict = {} - for transcript in transcripts: - if transcript not in genepred_dict: - continue - start_dict[transcript] = {} - start_dict[transcript]['sense'] = genepred_table[genepred_dict[transcript][0]][2] - start_dict[transcript]['chr'] = genepred_table[genepred_dict[transcript][0]][1] - start_dict[transcript]['start'] = get_tsss([transcript], genepred_table, genepred_dict) - if start_dict[transcript]['sense'] == '+': - start_dict[transcript]['end'] = [int(genepred_table[genepred_dict[transcript][0]][4])] - else: - start_dict[transcript]['end'] = [int(genepred_table[genepred_dict[transcript][0]][3])] - start_dict[transcript]['name'] = genepred_table[genepred_dict[transcript][0]][11] - - return start_dict - - -# generic function to get the TSS of any gene -def get_tsss(gene_list, genepred_table, genepred_dict): - if len(gene_list) == 0: - genepred = genepred_table - else: - genepred = genepred_from_key(gene_list, genepred_dict, genepred_table) - tss = [] - for line in genepred: - if line[2] == '+': - tss.append(line[3]) - if line[2] == '-': - tss.append(line[4]) - tss = list(map(int, tss)) - - return tss - - -# 12/29/08 -# genepred_from_key(genepredKeyList,genepred_dict,genepred_table) -# function that grabs genepred lines from genepred IDs -def genepred_from_key(genepred_key_list, genepred_dict, genepred_table): - type_genepred = [] - for name in genepred_key_list: - if name in genepred_dict: - type_genepred.append(genepred_table[genepred_dict[name][0]]) - return type_genepred - - - -def import_genepred(genepred_file, return_multiples=False): - genepred_table = parse_table(genepred_file, '\\t') - genepred_dict = {} - ticker = 0 - for line in genepred_table: - transcript = line[0] - if transcript in genepred_dict: - genepred_dict[transcript].append(ticker) - else: - genepred_dict[transcript] = [ticker] - ticker = ticker + 1 - - multiples = [] - for i in genepred_dict: - if len(genepred_dict[i]) > 1: - multiples.append(i) - - if return_multiples: - return genepred_table, genepred_dict, multiples - else: - return genepred_table, genepred_dict - - -# ================================================================== -# ========================LOCUS INSTANCE============================ -# ================================================================== - -# Locus and LocusCollection instances courtesy of Graham Ruby - - -class Locus: - # this may save some space by reducing the number of chromosome strings - # that are associated with Locus instances (see __init__). - __chrDict = dict() - __senseDict = {'+': '+', '-': '-', '.': '.'} - - # chr = chromosome name (string) - # sense = '+' or '-' (or '.' for an ambidextrous locus) - # start,end = ints of the start and end coords of the locus - # end coord is the coord of the last nucleotide. - def __init__(self, chr, start, end, sense, id='', score=0): - coords = [int(start), int(end)] - coords.sort() - # this method for assigning chromosome should help avoid storage of - # redundant strings. - if chr not in self.__chrDict: - self.__chrDict[chr] = chr - self._chr = self.__chrDict[chr] - self._sense = self.__senseDict[sense] - self._start = int(coords[0]) - self._end = int(coords[1]) - self._id = id - self._score = score - - def id(self): - return self._id - - def chr(self): - return self._chr - - def start(self): - return self._start # returns the smallest coordinate - - def end(self): - return self._end # returns the biggest coordinate - - def len(self): - return self._end - self._start + 1 - - def get_antisense_locus(self): - if self._sense == '.': - return self - else: - switch = {'+': '-', '-': '+'} - return Locus(self._chr, self._start, self._end, switch[self._sense]) - - def coords(self): - return [self._start, self._end] # returns a sorted list of the coordinates - - def sense(self): - return self._sense - - def score(self): - return self._score - - # returns boolean; True if two loci share any coordinates in common - def overlaps(self, other_locus): - if self.chr() != other_locus.chr(): - return False - elif not (self._sense == '.' or other_locus.sense() == '.' or self.sense() == other_locus.sense()): - return False - elif self.start() > other_locus.end() or other_locus.start() > self.end(): - return False - else: - return True - - # returns boolean; True if all the nucleotides of the given locus overlap - # with the self locus - def contains(self, other_locus): - if self.chr() != other_locus.chr(): - return False - elif not (self._sense == '.' or other_locus.sense() == '.' or self.sense() == other_locus.sense()): - return False - elif self.start() > other_locus.start() or other_locus.end() > self.end(): - return False - else: - return True - - # same as overlaps, but considers the opposite strand - def overlaps_antisense(self, other_locus): - return self.get_antisense_locus().overlaps(other_locus) - - # same as contains, but considers the opposite strand - def contains_antisense(self, other_locus): - return self.get_antisense_locus().contains(other_locus) - - def __hash__(self): - return self._start + self._end - - def __eq__(self, other): - if self.__class__ != other.__class__: - return False - if self.chr() != other.chr(): - return False - if self.start() != other.start(): - return False - if self.end() != other.end(): - return False - if self.sense() != other.sense(): - return False - return True - - def __ne__(self, other): - return not (self.__eq__(other)) - - def __str__(self): - return self.chr() + '(' + self.sense() + '):' + '-'.join(map(str, self.coords())) - - def check_rep(self): - pass - - -class LocusCollection: - def __init__(self, loci, window_size): - self.__chr_to_coord_to_loci = dict() - self.__loci = dict() - self.__win_size = window_size - for lcs in loci: - self.__add_locus(lcs) - - def __add_locus(self, lcs): - if lcs not in self.__loci: - self.__loci[lcs] = None - if lcs.sense() == '.': - chr_key_list = [lcs.chr() + '+', lcs.chr() + '-'] - else: - chr_key_list = [lcs.chr() + lcs.sense()] - for chr_key in chr_key_list: - if chr_key not in self.__chr_to_coord_to_loci: - self.__chr_to_coord_to_loci[chr_key] = dict() - for n in self.__get_key_range(lcs): - if n not in self.__chr_to_coord_to_loci[chr_key]: - self.__chr_to_coord_to_loci[chr_key][n] = [] - self.__chr_to_coord_to_loci[chr_key][n].append(lcs) - - def __get_key_range(self, locus): - start = locus.start() // self.__win_size - # add 1 because of the range - end = locus.end() // self.__win_size + 1 - return range(start, end) - - def __len__(self): - return len(self.__loci) - - def append(self, new): - self.__add_locus(new) - - def extend(self, new_list): - for lcs in new_list: - self.__add_locus(lcs) - - def has_locus(self, locus): - return locus in self.__loci - - def remove(self, old): - if old not in self.__loci: - raise ValueError("requested locus isn't in collection") - del self.__loci[old] - if old.sense() == '.': - sense_list = ['+', '-'] - else: - sense_list = [old.sense()] - for k in self.__get_key_range(old): - for sense in sense_list: - self.__chr_to_coord_to_loci[old.chr() + sense][k].remove(old) - - def get_window_size(self): - return self.__win_size - - def get_loci(self): - return list(self.__loci.keys()) - - def get_chr_list(self): - # i need to remove the strand info from the chromosome keys and make - # them non-redundant. - temp_keys = dict() - for k in list(self.__chr_to_coord_to_loci.keys()): - temp_keys[k[:-1]] = None - return list(temp_keys.keys()) - - def __subset_helper(self, locus, sense): - sense = sense.lower() - if ['sense', 'antisense', 'both'].count(sense) != 1: - raise ValueError("sense command invalid: '" + sense + "'.") - matches = dict() - senses = ['+', '-'] - if locus.sense() == '.' or sense == 'both': - lamb = lambda s: True - elif sense == 'sense': - lamb = lambda s: s == locus.sense() - elif sense == 'antisense': - lamb = lambda s: s != locus.sense() - else: - raise ValueError("sense value was inappropriate: '" + sense + "'.") - for s in filter(lamb, senses): - chr_key = locus.chr() + s - if chr_key in self.__chr_to_coord_to_loci: - for n in self.__get_key_range(locus): - if n in self.__chr_to_coord_to_loci[chr_key]: - for lcs in self.__chr_to_coord_to_loci[chr_key][n]: - matches[lcs] = None - return list(matches.keys()) - - # sense can be 'sense' (default), 'antisense', or 'both' - # returns all members of the collection that overlap the locus - def get_overlap(self, locus, sense='sense'): - matches = self.__subset_helper(locus, sense) - # now, get rid of the ones that don't really overlap - real_matches = dict() - if sense == 'sense' or sense == 'both': - for i in [lcs for lcs in matches if lcs.overlaps(locus)]: - real_matches[i] = None - if sense == 'antisense' or sense == 'both': - for i in [lcs for lcs in matches if lcs.overlaps_antisense(locus)]: - real_matches[i] = None - return list(real_matches.keys()) - - # sense can be 'sense' (default), 'antisense', or 'both' - # returns all members of the collection that are contained by the locus - def get_contained(self, locus, sense='sense'): - matches = self.__subset_helper(locus, sense) - # now, get rid of the ones that don't really overlap - real_matches = dict() - if sense == 'sense' or sense == 'both': - for i in [lcs for lcs in matches if locus.contains(lcs)]: - real_matches[i] = None - if sense == 'antisense' or sense == 'both': - for i in [lcs for lcs in matches if locus.contains_antisense(lcs)]: - real_matches[i] = None - return list(real_matches.keys()) - - # sense can be 'sense' (default), 'antisense', or 'both' - # returns all members of the collection that contain the locus - def get_containers(self, locus, sense='sense'): - matches = self.__subset_helper(locus, sense) - # now, get rid of the ones that don't really overlap - real_matches = dict() - if sense == 'sense' or sense == 'both': - for i in [lcs for lcs in matches if lcs.contains(locus)]: - real_matches[i] = None - if sense == 'antisense' or sense == 'both': - for i in [lcs for lcs in matches if lcs.contains_antisense(locus)]: - real_matches[i] = None - return list(real_matches.keys()) - - def stitch_collection(self, stitch_window=1, sense='both'): - - """ - reduces the collection by stitching together overlapping loci - returns a new collection - """ - - # initializing stitch_window to 1 - # this helps collect directly adjacent loci - - locus_list = self.get_loci() - old_collection = LocusCollection(locus_list, 500) - - stitched_collection = LocusCollection([], 500) - - for locus in locus_list: - if old_collection.has_locus(locus): - old_collection.remove(locus) - overlapping_loci = old_collection.get_overlap( - Locus(locus.chr(), locus.start() - stitch_window, locus.end() + stitch_window, locus.sense(), - locus.id()), sense) - - stitch_ticker = 1 - while len(overlapping_loci) > 0: - stitch_ticker += len(overlapping_loci) - overlap_coords = locus.coords() - - for overlapping_locus in overlapping_loci: - overlap_coords += overlapping_locus.coords() - old_collection.remove(overlapping_locus) - if sense == 'both': - locus = Locus(locus.chr(), min(overlap_coords), max(overlap_coords), '.', locus.id()) - else: - locus = Locus(locus.chr(), min(overlap_coords), max(overlap_coords), locus.sense(), locus.id()) - overlapping_loci = old_collection.get_overlap( - Locus(locus.chr(), locus.start() - stitch_window, locus.end() + stitch_window, locus.sense()), - sense) - locus._id = f'{stitch_ticker}_{locus.id()}_lociStitched' - - stitched_collection.append(locus) - - else: - continue - return stitched_collection - - -# ================================================================== -# ========================LOCUS FUNCTIONS=========================== -# ================================================================== -# 06/11/09 -# turns a locusCollection into a bed -# does not write to disk though -def locus_collection_to_bed(locus_collection): - loci_list = locus_collection.get_loci() - bed = [] - for locus in loci_list: - new_line = [locus.chr(), locus.coords()[0], locus.coords()[1], locus.id(), locus.score(), locus.sense()] - bed.append(new_line) - return bed - - -def bed_to_locus_collection(bed, window=500): - """ - opens up a bed file and turns it into a LocusCollection instance - """ - - loci_list = [Locus(line[0], line[1], line[2], line[5], line[3]) - for line in parse_table(bed, '\\t')] - - return LocusCollection(loci_list, window) - - -def make_tss_locus(gene, start_dict, upstream, downstream): - """ - given a start_dict, make a locus for any gene's TSS w/ upstream and downstream windows - """ - - start = start_dict[gene]['start'][0] - if start_dict[gene]['sense'] == '-': - return Locus(start_dict[gene]['chr'], start - downstream, start + upstream, '-', gene) - else: - return Locus(start_dict[gene]['chr'], start - upstream, start + downstream, '+', gene) - - -# ================================================================== -# ========================MISC FUNCTIONS============================ -# ================================================================== - - -# uniquify function -# by Peter Bengtsson -# Used under a creative commons license -# sourced from here: http://www.peterbe.com/plog/uniqifiers-benchmark - -def uniquify(seq, idfun=None): - # order preserving - if idfun is None: - def idfun(x): return x - seen = {} - result = [] - for item in seq: - marker = idfun(item) - # in old Python versions: - # if seen.has_key(marker) - # but in new ones: - if marker in seen: continue - seen[marker] = 1 - result.append(item) - return result - - -start_dict = make_start_dict("$genepred") -locus_collection = bed_to_locus_collection("$bed") -stitched_collection = region_stitching(locus_collection, int("$stitch"), int("$tss_dist"), start_dict) -stitched = locus_collection_to_bed(stitched_collection) -unparse_table(stitched, "${meta.id}.rose.bed", '\\t') - -# Create version file -versions = { - "${task.process}" : { - "python": platform.python_version() - } -} - -with open("versions.yml", "w") as f: - f.write(format_yaml_like(versions)) diff --git a/modules/nf-core/ucsc/gtftogenepred/environment.yml b/modules/nf-core/bedtools/complement/environment.yml similarity index 51% rename from modules/nf-core/ucsc/gtftogenepred/environment.yml rename to modules/nf-core/bedtools/complement/environment.yml index 5216fc8..396f324 100644 --- a/modules/nf-core/ucsc/gtftogenepred/environment.yml +++ b/modules/nf-core/bedtools/complement/environment.yml @@ -1,7 +1,7 @@ -name: ucsc_gtftogenepred +name: bedtools_complement channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ucsc-gtftogenepred=447 + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/complement/main.nf b/modules/nf-core/bedtools/complement/main.nf new file mode 100644 index 0000000..305b860 --- /dev/null +++ b/modules/nf-core/bedtools/complement/main.nf @@ -0,0 +1,38 @@ +process BEDTOOLS_COMPLEMENT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path sizes + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + complement \\ + -i $bed \\ + -g $sizes \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/complement/meta.yml b/modules/nf-core/bedtools/complement/meta.yml new file mode 100644 index 0000000..ee84bd7 --- /dev/null +++ b/modules/nf-core/bedtools/complement/meta.yml @@ -0,0 +1,51 @@ +name: bedtools_complement +description: Returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file. +keywords: + - bed + - gff + - vcf + - complement + - bedtools + - intervals +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" + - sizes: + type: file + description: File which defines the chromosome lengths for a given genome + pattern: "*.{sizes}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed file with all genomic intervals that are not covered by at least one record from the input file. + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/complement/tests/main.nf.test b/modules/nf-core/bedtools/complement/tests/main.nf.test new file mode 100644 index 0000000..e85c396 --- /dev/null +++ b/modules/nf-core/bedtools/complement/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + name "Test Process BEDTOOLS_COMPLEMENT" + script "../main.nf" + process "BEDTOOLS_COMPLEMENT" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/complement" + + test("sarscov2") { + when { + process { + """ + input[0] = [ + [ id:'test_out' ], // meta map + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/complement/tests/main.nf.test.snap b/modules/nf-core/bedtools/complement/tests/main.nf.test.snap new file mode 100644 index 0000000..31e016f --- /dev/null +++ b/modules/nf-core/bedtools/complement/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "sarscov2": { + "content": [ + { + "0": [ + [ + { + "id": "test_out" + }, + "test_out.bed:md5,d71df7e47aec0661c27b71e483e727f9" + ] + ], + "1": [ + "versions.yml:md5,ed0aaf16b8ea118771c2095bdba20ad7" + ], + "bed": [ + [ + { + "id": "test_out" + }, + "test_out.bed:md5,d71df7e47aec0661c27b71e483e727f9" + ] + ], + "versions": [ + "versions.yml:md5,ed0aaf16b8ea118771c2095bdba20ad7" + ] + } + ], + "timestamp": "2023-12-05T17:35:44.213921" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/complement/tests/tags.yml b/modules/nf-core/bedtools/complement/tests/tags.yml new file mode 100644 index 0000000..bc36c35 --- /dev/null +++ b/modules/nf-core/bedtools/complement/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/complement: + - "modules/nf-core/bedtools/complement/**" diff --git a/modules/nf-core/bedtools/intersect/environment.yml b/modules/nf-core/bedtools/intersect/environment.yml new file mode 100644 index 0000000..2a34305 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_intersect +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf new file mode 100644 index 0000000..d9e79e7 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -0,0 +1,59 @@ +process BEDTOOLS_INTERSECT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals1), path(intervals2) + tuple val(meta2), path(chrom_sizes) + + output: + tuple val(meta), path("*.${extension}"), emit: intersect + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + //Extension of the output file. It is set by the user via "ext.suffix" in the config. Corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). + extension = task.ext.suffix ?: "${intervals1.extension}" + def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' + if ("$intervals1" == "${prefix}.${extension}" || + "$intervals2" == "${prefix}.${extension}") + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + intersect \\ + -a $intervals1 \\ + -b $intervals2 \\ + $args \\ + $sizes \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "bed" + if ("$intervals1" == "${prefix}.${extension}" || + "$intervals2" == "${prefix}.${extension}") + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml new file mode 100644 index 0000000..0939cb5 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -0,0 +1,59 @@ +name: bedtools_intersect +description: Allows one to screen for overlaps between two sets of genomic features. +keywords: + - bed + - intersect + - overlap +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - intervals2: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - meta2: + type: map + description: | + Groovy Map containing reference chromosome sizes + e.g. [ id:'test' ] + - chrom_sizes: + type: file + description: Chromosome sizes file + pattern: "*{.sizes,.txt}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intersect: + type: file + description: File containing the description of overlaps found between the two features + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" diff --git a/modules/nf-core/bedtools/slop/environment.yml b/modules/nf-core/bedtools/slop/environment.yml new file mode 100644 index 0000000..e03ee96 --- /dev/null +++ b/modules/nf-core/bedtools/slop/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_slop +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/slop/main.nf b/modules/nf-core/bedtools/slop/main.nf new file mode 100644 index 0000000..e5b8e1e --- /dev/null +++ b/modules/nf-core/bedtools/slop/main.nf @@ -0,0 +1,49 @@ +process BEDTOOLS_SLOP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path sizes + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + slop \\ + -i $bed \\ + -g $sizes \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/slop/meta.yml b/modules/nf-core/bedtools/slop/meta.yml new file mode 100644 index 0000000..6911671 --- /dev/null +++ b/modules/nf-core/bedtools/slop/meta.yml @@ -0,0 +1,44 @@ +name: bedtools_slop +description: Adds a specified number of bases in each direction (unique values may be specified for either -l or -r) +keywords: + - bed + - slopBed + - bedtools +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/slop.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Slopped BED file + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/gnu/sort/environment.yml b/modules/nf-core/gnu/sort/environment.yml new file mode 100644 index 0000000..5c4eb9d --- /dev/null +++ b/modules/nf-core/gnu/sort/environment.yml @@ -0,0 +1,7 @@ +name: gnu_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::coreutils=8.25 diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf new file mode 100644 index 0000000..d0560ee --- /dev/null +++ b/modules/nf-core/gnu/sort/main.nf @@ -0,0 +1,51 @@ +process GNU_SORT { + tag "${meta.id}" + label "process_low" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/coreutils:8.25--1' : + 'biocontainers/coreutils:8.25--1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), file( "${output_file}" ) , emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.extension}" + output_file = "${prefix}.${suffix}" + def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + sort ${args} ${input} > ${output_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + coreutils: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.extension}" + output_file = "${prefix}.${suffix}" + def VERSION = "9.1" + + if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${output_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + coreutils: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml new file mode 100644 index 0000000..9d96175 --- /dev/null +++ b/modules/nf-core/gnu/sort/meta.yml @@ -0,0 +1,41 @@ +name: "gnu_sort" +description: | + Writes a sorted concatenation of file/s +keywords: + - GNU + - sort + - merge compare +tools: + - sort: + description: "Writes a sorted concatenation of file/s" + homepage: "https://github.com/vgl-hub/gfastats" + documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" + licence: ["GPL"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Draft assembly file + pattern: "*.{txt,bed,interval,genome,bins}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sorted: + type: file + description: The sorted txt file generated by sort + pattern: "*.{txt,bed,interval,genome,bins}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test b/modules/nf-core/gnu/sort/tests/main.nf.test new file mode 100644 index 0000000..e403018 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process GNU_SORT" + script "modules/nf-core/gnu/sort/main.nf" + process "GNU_SORT" + + tag "modules" + tag "modules_nfcore" + tag "gnu" + tag "gnu/sort" + + test("unsorted_genome_sort") { + config "./sort_simple_bed.config" + + when { + process { + """ + input[0] = [ + [id:'genome_test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['genome_file'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("genome_sort") + } + ) + } + + } + + test("unsorted_intervals_sort") { + config "./sort_simple_bed.config" + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['intervals'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("interval_sort") + } + ) + } + + } + + test("unsorted_csv_sort") { + config "./sort_complex.config" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("csv_sort") + } + ) + } + + } + + test("unsorted_csv_sort_stub") { + config "./sort_complex.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test.snap b/modules/nf-core/gnu/sort/tests/main.nf.test.snap new file mode 100644 index 0000000..fc20d87 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test.snap @@ -0,0 +1,136 @@ +{ + "unsorted_csv_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "1": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "versions": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ] + } + ], + "timestamp": "2024-02-22T13:57:33.598003" + }, + "interval_sort": { + "content": [ + "test.bed.sorted" + ], + "timestamp": "2024-02-22T13:57:25.5442" + }, + "unsorted_csv_sort_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ] + } + ], + "timestamp": "2024-02-22T13:57:41.490986" + }, + "csv_sort": { + "content": [ + "test.csv.sorted" + ], + "timestamp": "2024-02-22T13:57:33.62444" + }, + "unsorted_genome_sort": { + "content": [ + { + "0": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "1": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ], + "sorted": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "versions": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ] + } + ], + "timestamp": "2024-02-22T13:57:17.281092" + }, + "genome_sort": { + "content": [ + "genome_test.bed.sorted" + ], + "timestamp": "2024-02-22T13:57:17.340463" + }, + "unsorted_intervals_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "1": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "versions": [ + "versions.yml:md5,8ebec31a85721157399cb4faab6e0d26" + ] + } + ], + "timestamp": "2024-02-22T13:57:25.514206" + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_complex.config b/modules/nf-core/gnu/sort/tests/sort_complex.config new file mode 100644 index 0000000..103eaaf --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_complex.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-t ';' -g -k 1,1 -k 2,2" } + ext.suffix = { "csv.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_bed.config b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config new file mode 100644 index 0000000..d7d52e0 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "bed.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_genome.config b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config new file mode 100644 index 0000000..4dcec38 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "genome.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/tags.yml b/modules/nf-core/gnu/sort/tests/tags.yml new file mode 100644 index 0000000..ac40e37 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/tags.yml @@ -0,0 +1,2 @@ +gnu/sort: + - "modules/nf-core/gnu/sort/**" diff --git a/modules/nf-core/ucsc/gtftogenepred/main.nf b/modules/nf-core/ucsc/gtftogenepred/main.nf deleted file mode 100644 index afbb5f3..0000000 --- a/modules/nf-core/ucsc/gtftogenepred/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process UCSC_GTFTOGENEPRED { - tag "${meta.id}" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:447--h954228d_0': - 'biocontainers/ucsc-gtftogenepred:447--h954228d_0' }" - - input: - tuple val(meta), path(gtf) - - output: - tuple val(meta), path("*.genepred"), emit: genepred - tuple val(meta), path("*.refflat") , emit: refflat , optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def gen_refflat = args.contains("-genePredExt") && args.contains("-geneNameAsName2") ? "true" : "false" - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - gtfToGenePred \\ - $args \\ - $gtf \\ - ${prefix}.genepred - - if [ "${gen_refflat}" == "true" ] ; then - awk 'BEGIN { OFS="\\t"} {print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.refflat - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ucsc: $VERSION - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '447' - """ - touch ${prefix}.genepred - touch ${prefix}.refflat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ucsc: $VERSION - END_VERSIONS - """ -} diff --git a/modules/nf-core/ucsc/gtftogenepred/meta.yml b/modules/nf-core/ucsc/gtftogenepred/meta.yml deleted file mode 100644 index 02122e4..0000000 --- a/modules/nf-core/ucsc/gtftogenepred/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: ucsc_gtftogenepred -description: compute average score of bigwig over bed file -keywords: - - gtf - - genepred - - refflat - - ucsc - - gtftogenepred -tools: - - ucsc: - description: Convert GTF files to GenePred format - homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ - licence: ["varies; see http://genome.ucsc.edu/license"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gtf: - type: file - description: GTF file - pattern: "*.{gtf}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - genepred: - type: file - description: genepred file - pattern: "*.{genepred}" - - refflat: - type: file - description: refflat file - pattern: "*.{refflat}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@BarryDigby" - - "@anoronh4" -maintainers: - - "@BarryDigby" - - "@anoronh4" diff --git a/nextflow.config b/nextflow.config index 914d359..d54ec29 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,8 +27,11 @@ params { expression_aggregation = 'mean' affinity_aggregation = 'max' chromhmm_states = 10 - chromhmm_threshold = 0.9 - chromhmm_marks = 'H3K27ac,H3K4me3' + chromhmm_threshold = 0.75 + chromhmm_enhancer_marks = 'H3K27ac,H3K4me1' + chromhmm_promoter_marks = 'H3K4me3' + rose_tss_window = 2500 + rose_stitching_window = 12500 dynamite_ofolds = 3 dynamite_ifolds = 6 diff --git a/nextflow_schema.json b/nextflow_schema.json index c1e74fb..7b92612 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -131,17 +131,38 @@ }, "chromhmm_threshold": { "type": "number", - "default": 0.9, + "default": 0.75, "description": "Threshold for ChromHMM enhancer detection.", "fa_icon": "fas fa-compress-arrows-alt", "help_text": "Threshold for ChromHMM enhancer detection. The default value is 0.9." }, - "chromhmm_marks": { + "chromhmm_enhancer_marks": { "type": "string", - "default": "H3K27ac,H3K4me3", + "default": "H3K27ac,H3K4me1", "description": "Comma-separated ChromHMM enhancer marks.", "fa_icon": "fas fa-compress-arrows-alt", - "help_text": "ChromHMM enhancer marks. The default value is `H3K27acH3K4me3`." + "help_text": "ChromHMM enhancer marks. The default value is `H3K27ac`." + }, + "chromhmm_promoter_marks": { + "type": "string", + "default": "H3K4me3", + "description": "Comma-separated ChromHMM promoter marks.", + "fa_icon": "fas fa-compress-arrows-alt", + "help_text": "ChromHMM promoter marks. The default value is `H3K4me3`." + }, + "rose_tss_window": { + "type": "integer", + "default": 2500, + "description": "TSS window in base pairs", + "fa_icon": "fas fa-compress-arrows-alt", + "help_text": "ROSE window size around transcription start sites" + }, + "rose_stitching_window": { + "type": "integer", + "default": 12500, + "description": "Stichting window in base pairs", + "fa_icon": "fas fa-compress-arrows-alt", + "help_text": "ROSE window size for stitching two regions together" }, "min_count": { "type": "integer", diff --git a/subworkflows/local/chromhmm.nf b/subworkflows/local/chromhmm.nf index e64eaba..ae9294a 100644 --- a/subworkflows/local/chromhmm.nf +++ b/subworkflows/local/chromhmm.nf @@ -1,9 +1,7 @@ -// Modules -include { SAMTOOLS_REHEADER as REHEADER_SIGNAL } from '../../modules/nf-core/samtools/reheader' -include { SAMTOOLS_REHEADER as REHEADER_CONTROL } from '../../modules/nf-core/samtools/reheader' include { BINARIZE_BAMS } from '../../modules/local/chromhmm/binarize_bams' include { LEARN_MODEL } from '../../modules/local/chromhmm/learn_model' -include { GET_RESULTS } from '../../modules/local/chromhmm/get_results' +include { GET_RESULTS as GET_ENHANCER_RESULTS } from '../../modules/local/chromhmm/get_results' +include { GET_RESULTS as GET_PROMOTER_RESULTS } from '../../modules/local/chromhmm/get_results' workflow CHROMHMM { @@ -12,7 +10,8 @@ workflow CHROMHMM { chrom_sizes n_states threshold - marks + enhancer_marks + promoter_marks main: @@ -31,40 +30,60 @@ workflow CHROMHMM { assay: meta.assay], bam]} - ch_signal = REHEADER_SIGNAL (ch_bams.signal ).bam.map{meta, bam -> remove_type(meta, bam)} - ch_control = REHEADER_CONTROL(ch_bams.control).bam.map{meta, bam -> remove_type(meta, bam)} + + ch_signal = ch_bams.signal.map{meta, bam -> remove_type(meta, bam)} + ch_control = ch_bams.control.map{meta, bam -> remove_type(meta, bam)} + ch_joined = ch_signal.join(ch_control) ch_mixed = ch_signal.mix(ch_control) - ch_versions = ch_versions.mix(REHEADER_SIGNAL.out.versions) - ch_versions = ch_versions.mix(REHEADER_CONTROL.out.versions) - ch_table = ch_joined .map{meta, signal, control -> [meta.condition, meta.assay, signal.name, control.name]} .collectFile() { ["cellmarkfiletable.tsv", it.join("\t") + "\n"] }.map{[it.baseName, it]}.collect() + // drop meta, remove duplicated control bams, add new meta BINARIZE_BAMS( - ch_mixed.map{meta, bam -> bam}.collect().map{files -> [[id: "chromHMM"], files]}, + ch_mixed.map{meta, bam -> bam}.unique().collect().map{files -> [[id: "chromHMM"], files]}, ch_table, chrom_sizes ) LEARN_MODEL( - BINARIZE_BAMS.out.map{meta, files -> files}.flatten().collect().map{files -> [[id: "chromHMM"], files]}, + BINARIZE_BAMS.out.binarized_bams.map{meta, files -> files}.flatten().collect().map{files -> [[id: "chromHMM"], files]}, n_states ) - GET_RESULTS(LEARN_MODEL.out.transpose() - .map{meta, emmisions, bed -> - [meta + [id: bed.simpleName.split("_")[0]], - emmisions, bed]}, threshold, marks) + GET_ENHANCER_RESULTS(LEARN_MODEL.out.model + .transpose() + .map{meta, emissions, bed -> [[id: bed.simpleName.split("_")[0]], emissions, bed]}, + threshold, + enhancer_marks, + ) + + GET_PROMOTER_RESULTS(LEARN_MODEL.out.model + .transpose() + .map{meta, emissions, bed -> [[id: bed.simpleName.split("_")[0]], emissions, bed]}, + threshold, + promoter_marks, + ) + + ch_enhancers = GET_ENHANCER_RESULTS.out.regions + .map{meta, bed -> [[id: meta.id + "_" + "chromHMM_enhancers", condition: meta.id, assay: "chromHMM_enhancers"], bed]} + + ch_promoters = GET_PROMOTER_RESULTS.out.regions + .map{meta, bed -> [[id: meta.id + "_" + "chromHMM_promoters", condition: meta.id, assay: "chromHMM_promoters"], bed]} - ch_enhancers = GET_RESULTS.out.map{meta, bed -> [[condition: meta.id, assay: "chromHMM_enhancers"], bed]} - .map{meta, bed -> [meta + [id: meta.condition + "_" + meta.assay], bed]} + ch_versions = ch_versions.mix( + BINARIZE_BAMS.out.versions, + LEARN_MODEL.out.versions, + GET_ENHANCER_RESULTS.out.versions, + GET_PROMOTER_RESULTS.out.versions, + ) emit: enhancers = ch_enhancers + promoters = ch_promoters versions = ch_versions } diff --git a/subworkflows/local/fimo.nf b/subworkflows/local/fimo.nf index 0b13d28..3da8310 100644 --- a/subworkflows/local/fimo.nf +++ b/subworkflows/local/fimo.nf @@ -1,6 +1,6 @@ include { FILTER_MOTIFS } from "../../modules/local/fimo/filter_motifs" -include { CAT_CAT as CONCAT_BEDS } from "../../modules/nf-core/cat/cat" -include { BEDTOOLS_SORT as SORT_REGIONS } from "../../modules/nf-core/bedtools/sort" +include { GAWK as ADD_MISSING_COLUMNS } from "../../modules/nf-core/gawk" +include { GNU_SORT as CONCAT_AND_SORT } from "../../modules/nf-core/gnu/sort" include { BEDTOOLS_MERGE as MERGE_REGIONS } from "../../modules/nf-core/bedtools/merge" include { BEDTOOLS_GETFASTA as EXTRACT_SEQUENCE } from "../../modules/nf-core/bedtools/getfasta" include { RUN_FIMO } from "../../modules/local/fimo/run_fimo" @@ -19,16 +19,16 @@ workflow FIMO { FILTER_MOTIFS(tf_ranking, motifs_meme) - ch_cat_input = enhancer_regions + ADD_MISSING_COLUMNS(enhancer_regions, []) + + ch_concat_and_sort = ADD_MISSING_COLUMNS.out.output .map{meta, file -> file} .collect() .map{files -> [[id: "enhancer_regions"], files]} - CONCAT_BEDS(ch_cat_input) - - SORT_REGIONS(CONCAT_BEDS.out.file_out, []) + CONCAT_AND_SORT(ch_concat_and_sort) - MERGE_REGIONS(SORT_REGIONS.out.sorted) + MERGE_REGIONS(CONCAT_AND_SORT.out.sorted) EXTRACT_SEQUENCE(MERGE_REGIONS.out.bed, fasta.map{meta, fasta -> fasta}) @@ -47,8 +47,8 @@ workflow FIMO { ch_versions = ch_versions.mix( FILTER_MOTIFS.out.versions, - CONCAT_BEDS.out.versions, - SORT_REGIONS.out.versions, + ADD_MISSING_COLUMNS.out.versions, + CONCAT_AND_SORT.out.versions, MERGE_REGIONS.out.versions, EXTRACT_SEQUENCE.out.versions, RUN_FIMO.out.versions, diff --git a/subworkflows/local/peaks.nf b/subworkflows/local/peaks.nf index a78297d..83ff62b 100644 --- a/subworkflows/local/peaks.nf +++ b/subworkflows/local/peaks.nf @@ -31,7 +31,8 @@ workflow PEAKS { chrom_sizes chromhmm_states chromhmm_threshold - chromhmm_marks + chromhmm_enhancer_marks + chromhmm_promoter_marks main: @@ -60,13 +61,13 @@ workflow PEAKS { ch_versions = ch_versions.mix(SORT_PEAKS.out.versions) } - CHROMHMM(ch_samplesheet_bam, chrom_sizes, chromhmm_states, chromhmm_threshold, chromhmm_marks) - ROSE(CHROMHMM.out.enhancers, gtf) + CHROMHMM(ch_samplesheet_bam, chrom_sizes, chromhmm_states, chromhmm_threshold, chromhmm_enhancer_marks, chromhmm_promoter_marks) + ROSE(CHROMHMM.out.enhancers.mix(CHROMHMM.out.promoters), gtf, chrom_sizes) ch_versions = ch_versions.mix(CHROMHMM.out.versions) ch_versions = ch_versions.mix(ROSE.out.versions) - ch_peaks = ch_peaks .mix(ROSE.out.enhancers) + ch_peaks = ch_peaks .mix(ROSE.out.stitched) .map { meta, peaks -> [[ id: meta.id, condition: meta.condition, @@ -137,7 +138,7 @@ workflow PEAKS { emit: affinity_ratio = AFFINITY_RATIO.out.combined affinity_sum = AFFINITY_SUM.out.combined - enhancers = ROSE.out.enhancers + candidate_regions = ch_peaks versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/rose.nf b/subworkflows/local/rose.nf index b7af511..b27b372 100644 --- a/subworkflows/local/rose.nf +++ b/subworkflows/local/rose.nf @@ -1,23 +1,105 @@ -include { ROSE as RUN_ROSE } from "../../modules/local/rose" -include { UCSC_GTFTOGENEPRED } from "../../modules/nf-core/ucsc/gtftogenepred" +include { GAWK as FILTER_CONVERT_GTF } from '../../modules/nf-core/gawk' +include { GNU_SORT as SORT_BED } from '../../modules/nf-core/gnu/sort' +include { BEDTOOLS_SLOP as CONSTRUCT_TSS } from '../../modules/nf-core/bedtools/slop' +include { BEDTOOLS_SUBTRACT as FILTER_PREDICTIONS } from '../../modules/nf-core/bedtools/subtract' +include { BEDTOOLS_COMPLEMENT as INVERT_TSS } from '../../modules/nf-core/bedtools/complement' +include { BEDTOOLS_MERGE as STITCHING } from '../../modules/nf-core/bedtools/merge' +include { BEDTOOLS_INTERSECT as TSS_OVERLAP } from '../../modules/nf-core/bedtools/intersect' +include { GAWK as FILTER_OVERLAPS } from '../../modules/nf-core/gawk' +include { BEDTOOLS_SUBTRACT as SUBTRACT_OVERLAPS } from '../../modules/nf-core/bedtools/subtract' +include { BEDTOOLS_INTERSECT as UNSTITCHED_REGIONS } from '../../modules/nf-core/bedtools/intersect' +include { GNU_SORT as CONCAT_AND_SORT } from '../../modules/nf-core/gnu/sort' workflow ROSE { take: ch_bed ch_gtf + chrom_sizes main: ch_versions = Channel.empty() - UCSC_GTFTOGENEPRED(ch_gtf) - RUN_ROSE(ch_bed, UCSC_GTFTOGENEPRED.out.genepred) + // Convert GTF to BED format and collapse regions to a single base pair at their start positions + FILTER_CONVERT_GTF(ch_gtf, []) - ch_versions = ch_versions.mix(RUN_ROSE.out.versions) - ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions) + // Downstream methods require sorted inputs + SORT_BED(FILTER_CONVERT_GTF.out.output) + + // Construct 2 * params.rose_tss_window bps window around transcription start site (TSS) + CONSTRUCT_TSS(SORT_BED.out.sorted, chrom_sizes.map{meta, file -> file}) + + INVERT_TSS(CONSTRUCT_TSS.out.bed, chrom_sizes.map{meta, file -> file}) + + predicted_regions = ch_bed.branch{ + meta, file -> + enhancers: meta.assay.contains('enhancers') + promoters: meta.assay.contains('promoters') + } + + ch_filter_predictions = Channel.empty() + .mix( + predicted_regions.enhancers.combine(CONSTRUCT_TSS.out.bed), + predicted_regions.promoters.combine(INVERT_TSS.out.bed), + ) + .map{meta1, pred, meta2, filtering -> [meta1, pred, filtering]} + + // Remove predictions contained within a TSS + FILTER_PREDICTIONS(ch_filter_predictions) + + // Merge regions closer than params.rose_stichting_window bps from each other + STITCHING(FILTER_PREDICTIONS.out.bed) + + // Get overlap counts of stitched regions with TSS + ch_tss_overlap = STITCHING.out.bed + .combine(CONSTRUCT_TSS.out.bed) + .map{meta1, stitched, meta2, tss -> [meta1, stitched, tss]} + + TSS_OVERLAP(ch_tss_overlap, [[], []]) + + // Filter regions that overlap at least 2 TSS + FILTER_OVERLAPS(TSS_OVERLAP.out.intersect, []) + + // Remove regions that overlap at least 2 TSS from stitched regions + ch_subtract_overlaps = STITCHING.out.bed + .combine(FILTER_OVERLAPS.out.output) + .filter{meta1, stitched, meta2, overlaps -> meta1.id == meta2.id} + .map{meta1, stitched, meta2, overlaps -> [meta1, stitched, overlaps]} + + SUBTRACT_OVERLAPS(ch_subtract_overlaps) + + // Get original regions (before stitching) of stitched regions that overlap at least 2 TSS + ch_unstitched_regions = FILTER_OVERLAPS.out.output + .combine(ch_bed) + .filter{meta1, overlaps, meta2, pred -> meta1.id == meta2.id} + .map{meta1, overlaps, meta2, pred -> [meta1, overlaps, pred]} + + UNSTITCHED_REGIONS(ch_unstitched_regions, [[], []]) + + // Combine correctly stitched (overlap with < 2 TSS) and original unstitched regions and sort + ch_concat_and_sort = SUBTRACT_OVERLAPS.out.bed + .combine(UNSTITCHED_REGIONS.out.intersect) + .filter{meta1, stitched, meta2, unstitched -> meta1.id == meta2.id} + .map{meta1, stitched, meta2, unstitched -> [meta1, [stitched, unstitched]]} + + CONCAT_AND_SORT(ch_concat_and_sort) + + ch_versions = ch_versions.mix( + FILTER_CONVERT_GTF.out.versions, + SORT_BED.out.versions, + CONSTRUCT_TSS.out.versions, + INVERT_TSS.out.versions, + FILTER_PREDICTIONS.out.versions, + STITCHING.out.versions, + TSS_OVERLAP.out.versions, + FILTER_OVERLAPS.out.versions, + SUBTRACT_OVERLAPS.out.versions, + UNSTITCHED_REGIONS.out.versions, + CONCAT_AND_SORT.out.versions, + ) emit: - enhancers = RUN_ROSE.out.stitched + stitched = CONCAT_AND_SORT.out.sorted versions = ch_versions } diff --git a/workflows/tfactivity.nf b/workflows/tfactivity.nf index e48fd64..b8fa3e6 100644 --- a/workflows/tfactivity.nf +++ b/workflows/tfactivity.nf @@ -44,7 +44,8 @@ workflow TFACTIVITY { ch_samplesheet_bam chromhmm_states chromhmm_threshold - chromhmm_marks + chromhmm_enhancer_marks + chromhmm_promoter_marks // Peaks window_size @@ -117,7 +118,8 @@ workflow TFACTIVITY { chrom_sizes, chromhmm_states, chromhmm_threshold, - chromhmm_marks + chromhmm_enhancer_marks, + chromhmm_promoter_marks, ) DYNAMITE( @@ -139,7 +141,7 @@ workflow TFACTIVITY { FIMO( fasta, RANKING.out.tf_total_ranking, - PEAKS.out.enhancers, + PEAKS.out.candidate_regions, MOTIFS.out.meme, )