diff --git a/MACS3/Commands/callvar_cmd.py b/MACS3/Commands/callvar_cmd.py index 7f1a8097..cbd900fc 100644 --- a/MACS3/Commands/callvar_cmd.py +++ b/MACS3/Commands/callvar_cmd.py @@ -1,4 +1,4 @@ -# Time-stamp: <2024-10-02 16:34:23 Tao Liu> +# Time-stamp: <2024-10-11 10:28:07 Tao Liu> """Description: Call variants directly @@ -137,11 +137,11 @@ def run(args): peakio = open(peakbedfile) peaks = PeakIO() - i = 0 + #i = 0 for t_peak in peakio: fs = t_peak.rstrip().split() - i += 1 - peaks.add(fs[0].encode(), int(fs[1]), int(fs[2]), name=b"%d" % i) + # i += 1 + peaks.add(fs[0].encode(), int(fs[1]), int(fs[2])) # , name=b"%d" % i) peaks.sort() # chrs = peaks.get_chr_names() diff --git a/MACS3/Commands/refinepeak_cmd.py b/MACS3/Commands/refinepeak_cmd.py index 47f7610a..ba9a4939 100644 --- a/MACS3/Commands/refinepeak_cmd.py +++ b/MACS3/Commands/refinepeak_cmd.py @@ -1,4 +1,4 @@ -# Time-stamp: <2024-10-02 17:01:42 Tao Liu> +# Time-stamp: <2024-10-11 11:11:00 Tao Liu> """Description: refine peak summits diff --git a/MACS3/IO/PeakIO.py b/MACS3/IO/PeakIO.py index 433dafbf..9ba1496f 100644 --- a/MACS3/IO/PeakIO.py +++ b/MACS3/IO/PeakIO.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-10 17:00:18 Tao Liu> +# Time-stamp: <2024-10-11 11:13:11 Tao Liu> """Module for PeakIO IO classes. @@ -15,7 +15,6 @@ from itertools import groupby from operator import itemgetter import random -import re import sys # ------------------------------------ @@ -75,7 +74,7 @@ def __init__(self, pscore: cython.float, fold_change: cython.float, qscore: cython.float, - name: bytes = b"NA"): + name: bytes = b""): self.chrom = chrom self.start = start self.end = end @@ -163,26 +162,15 @@ def __init__(self): @cython.ccall def add(self, chromosome: bytes, - start: cython.int, - end: cython.int, - summit: cython.int = 0, - peak_score: cython.float = 0, - pileup: cython.float = 0, - pscore: cython.float = 0, - fold_change: cython.float = 0, - qscore: cython.float = 0, - name: bytes = b"NA"): - """items: - start:start - end:end, - length:end-start, - summit:summit, - score:peak_score, - pileup:pileup, - pscore:pscore, - fc:fold_change, - qscore:qscore - """ + start: cython.int, # leftmost position + end: cython.int, # rightmost position + summit: cython.int = 0, # summit position + peak_score: cython.float = 0, # score + pileup: cython.float = 0, # pileup value + pscore: cython.float = 0, # -log10 pvalue + fold_change: cython.float = 0, # fold change + qscore: cython.float = 0, # -log10 qvalue + name: bytes = b""): # peak name if not self.peaks.has_key(chromosome): self.peaks[chromosome] = [] self.peaks[chromosome].append(PeakContent(chromosome, @@ -215,7 +203,7 @@ def get_data_from_chrom(self, chrom: bytes) -> list: return self.peaks[chrom] def get_chr_names(self) -> set: - return set(sorted(self.peaks.keys())) + return set(self.peaks.keys()) def sort(self): chrs: list @@ -342,6 +330,8 @@ def __str__(self): chrs: list n_peak: cython.int ret: str + chrom: bytes + peaks: list ret = "" chrs = list(self.peaks.keys()) @@ -448,7 +438,7 @@ def _to_summits_bed(self, print_func("%s\t%d\t%d\t%s%d\t%.6g\n" % (chrom.decode(), summit_p, summit_p+1, peakprefix.decode(), n_peak, peak[score_column])) def tobed(self): - """Print out peaks in BED5 format. + """Print out (stdout) peaks in BED5 format. Five columns are chromosome, peak start, peak end, peak name, and peak height. @@ -462,19 +452,19 @@ def tobed(self): fc:fold_change, qscore:qvalue """ - return self._to_bed(name_prefix=b"peak_", score_column="score", name=b"", description=b"") + return self._to_bed(name_prefix=b"%s_peak_", score_column="score", name=self.name, description=b"") def to_summits_bed(self): - """Print out peak summits in BED5 format. + """Print out (stdout) peak summits in BED5 format. Five columns are chromosome, summit start, summit end, peak name, and peak height. """ - return self._to_summits_bed(name_prefix=b"peak_", score_column="score", name=b"", description=b"") + return self._to_summits_bed(name_prefix=b"%s_peak_", score_column="score", name=self.name, description=b"") # these methods are very fast, specifying types is unnecessary def write_to_bed(self, fhd, - name_prefix: bytes = b"peak_", + name_prefix: bytes = b"%s_peak_", name: bytes = b"MACS", description: bytes = b"%s", score_column: str = "score", @@ -538,7 +528,7 @@ def write_to_summit_bed(self, fhd, def write_to_narrowPeak(self, fhd, name_prefix: bytes = b"%s_peak_", - name: bytes = b"peak", + name: bytes = b"MACS", score_column: str = "score", trackline: bool = False): """Print out peaks in narrowPeak format. diff --git a/MACS3/Signal/FixWidthTrack.pyx b/MACS3/Signal/FixWidthTrack.pyx index 077d6324..01e5e0f6 100644 --- a/MACS3/Signal/FixWidthTrack.pyx +++ b/MACS3/Signal/FixWidthTrack.pyx @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2022-09-15 17:17:37 Tao Liu> +# Time-stamp: <2024-10-11 11:11:10 Tao Liu> """Module for FWTrack classes. diff --git a/MACS3/Signal/PairedEndTrack.py b/MACS3/Signal/PairedEndTrack.py index 8273495a..848bc55b 100644 --- a/MACS3/Signal/PairedEndTrack.py +++ b/MACS3/Signal/PairedEndTrack.py @@ -1,6 +1,6 @@ # cython: language_level=3 # cython: profile=True -# Time-stamp: <2024-10-10 17:03:45 Tao Liu> +# Time-stamp: <2024-10-11 11:21:30 Tao Liu> """Module for filter duplicate tags from paired-end data @@ -684,6 +684,7 @@ def add_frag(self, if chromosome not in self.__locations: self.__buf_size[chromosome] = self.buffer_size # note: ['l'] is the leftmost end, ['r'] is the rightmost end of fragment. + # ['c'] is the count number of this fragment self.__locations[chromosome] = np.zeros(shape=self.buffer_size, dtype=[('l', 'i4'), ('r', 'i4'), ('c', 'u1')]) self.__barcodes[chromosome] = np.zeros(shape=self.buffer_size, diff --git a/test/test_ScoreTrack.py b/test/test_ScoreTrack.py index 61eadc4e..41eaeb98 100644 --- a/test/test_ScoreTrack.py +++ b/test/test_ScoreTrack.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Time-stamp: <2020-11-30 14:12:58 Tao Liu> +# Time-stamp: <2024-10-11 10:17:53 Tao Liu> import io import unittest @@ -96,11 +96,11 @@ def setUp(self): chrY 160 210 6.40804 """ # for peak calls - self.peak1 = """chrY 0 60 peak_1 60.4891 -chrY 160 210 peak_2 6.40804 + self.peak1 = """chrY 0 60 MACS_peak_1 60.4891 +chrY 160 210 MACS_peak_2 6.40804 """ - self.summit1 = """chrY 5 6 peak_1 60.4891 -chrY 185 186 peak_2 6.40804 + self.summit1 = """chrY 5 6 MACS_peak_1 60.4891 +chrY 185 186 MACS_peak_2 6.40804 """ self.xls1 ="""chr start end length abs_summit pileup -log10(pvalue) fold_enrichment -log10(qvalue) name chrY 1 60 60 6 100 63.2725 9.18182 -1 MACS_peak_1