From 2e5837c1420da880d725f422f66966f6d5efd1f2 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Thu, 10 Oct 2024 17:17:23 -0400
Subject: [PATCH] rewrite PeakIO.py, PairedEndTrack.py;

---
 MACS3/IO/{PeakIO.pyx => PeakIO.py} | 1010 +++++++++++++++++-----------
 MACS3/Signal/CallPeakUnit.pyx      |    4 +-
 MACS3/Signal/PairedEndTrack.py     |  730 ++++++++++++++++++++
 MACS3/Signal/PairedEndTrack.pyx    |  584 ----------------
 MACS3/Signal/ScoreTrack.pyx        |    4 +-
 setup.py                           |    4 +-
 6 files changed, 1350 insertions(+), 986 deletions(-)
 rename MACS3/IO/{PeakIO.pyx => PeakIO.py} (60%)
 create mode 100644 MACS3/Signal/PairedEndTrack.py
 delete mode 100644 MACS3/Signal/PairedEndTrack.pyx

diff --git a/MACS3/IO/PeakIO.pyx b/MACS3/IO/PeakIO.py
similarity index 60%
rename from MACS3/IO/PeakIO.pyx
rename to MACS3/IO/PeakIO.py
index e959db25..433dafbf 100644
--- a/MACS3/IO/PeakIO.pyx
+++ b/MACS3/IO/PeakIO.py
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-09-06 14:56:51 Tao Liu>
+# Time-stamp: <2024-10-10 17:00:18 Tao Liu>
 
 """Module for PeakIO IO classes.
 
@@ -22,25 +22,25 @@
 # MACS3 modules
 # ------------------------------------
 
-from MACS3.Utilities.Constants import *
+# from MACS3.Utilities.Constants import *
 
 # ------------------------------------
 # Other modules
 # ------------------------------------
-
-from cpython cimport bool
+import cython
+from cython.cimports.cpython import bool
 
 # ------------------------------------
 # constants
 # ------------------------------------
-__version__ = "PeakIO $Revision$"
-__author__ = "Tao Liu <taoliu@jimmy.harvard.edu>"
-__doc__ = "PeakIO class"
 
 # ------------------------------------
 # Misc functions
 # ------------------------------------
-cdef str subpeak_letters( int i):
+
+
+@cython.cfunc
+def subpeak_letters(i: cython.int) -> str:
     if i < 26:
         return chr(97+i)
     else:
@@ -50,24 +50,32 @@
 # Classes
 # ------------------------------------
 
-cdef class PeakContent:
-    cdef:
-        bytes chrom
-        int start
-        int end
-        int length
-        int summit
-        float score
-        float pileup
-        float pscore
-        float fc
-        float qscore
-        bytes name
-
-    def __init__ ( self, bytes chrom, int start, int end, int summit,
-                   float peak_score, float pileup,
-                   float pscore, float fold_change, float qscore,
-                   bytes name= b"NA" ):
+
+@cython.cclass
+class PeakContent:
+    chrom: bytes
+    start: cython.int
+    end: cython.int
+    length: cython.int
+    summit: cython.int
+    score: cython.float
+    pileup: cython.float
+    pscore: cython.float
+    fc: cython.float
+    qscore: cython.float
+    name: bytes
+
+    def __init__(self,
+                 chrom: bytes,
+                 start: cython.int,
+                 end: cython.int,
+                 summit: cython.int,
+                 peak_score: cython.float,
+                 pileup: cython.float,
+                 pscore: cython.float,
+                 fold_change: cython.float,
+                 qscore: cython.float,
+                 name: bytes = b"NA"):
         self.chrom = chrom
         self.start = start
         self.end = end
@@ -80,7 +88,7 @@ def __init__ ( self, bytes chrom, int start, int end, int summit,
         self.qscore = qscore
         self.name = name
 
-    def __getitem__ ( self, a ):
+    def __getitem__(self, a: str):
         if a == "chrom":
             return self.chrom
         elif a == "start":
@@ -104,7 +112,7 @@ def __getitem__ ( self, a ):
         elif a == "name":
             return self.name
 
-    def __setitem__ ( self, a, v ):
+    def __setitem__(self, a: str, v):
         if a == "chrom":
             self.chrom = v
         elif a == "start":
@@ -128,27 +136,42 @@ def __setitem__ ( self, a, v ):
         elif a == "name":
             self.name = v
 
-    def __str__ (self):
-        return "chrom:%s;start:%d;end:%d;score:%f" % ( self.chrom, self.start, self.end, self.score )
+    def __str__(self):
+        return "chrom:%s;start:%d;end:%d;score:%f" % (self.chrom,
+                                                      self.start,
+                                                      self.end,
+                                                      self.score)
+
 
-cdef class PeakIO:
+@cython.cclass
+class PeakIO:
     """IO for peak information.
 
     """
-    cdef:
-        public dict peaks       # dictionary storing peak contents
-        public bool CO_sorted   # whether peaks have been sorted by coordinations
-        public long total       # total number of peaks
-
-    def __init__ (self):
+    # dictionary storing peak contents
+    peaks = cython.declare(dict, visibility="public")
+    # whether peaks have been sorted by coordinations
+    CO_sorted = cython.declare(bool, visibility="public")
+    # total number of peaks
+    total = cython.declare(cython.long, visibility="public")
+
+    def __init__(self):
         self.peaks = {}
         self.CO_sorted = False
         self.total = 0
 
-    cpdef add (self, bytes chromosome, int start, int end, int summit = 0,
-               float peak_score = 0, float pileup = 0,
-               float pscore = 0, float fold_change = 0, float qscore = 0,
-               bytes name = b"NA"):
+    @cython.ccall
+    def add(self,
+            chromosome: bytes,
+            start: cython.int,
+            end: cython.int,
+            summit: cython.int = 0,
+            peak_score: cython.float = 0,
+            pileup: cython.float = 0,
+            pscore: cython.float = 0,
+            fold_change: cython.float = 0,
+            qscore: cython.float = 0,
+            name: bytes = b"NA"):
         """items:
         start:start
         end:end,
@@ -161,154 +184,165 @@ def __init__ (self):
         qscore:qscore
         """
         if not self.peaks.has_key(chromosome):
-            self.peaks[chromosome]=[]
-        self.peaks[chromosome].append(PeakContent( chromosome, start, end, summit, peak_score, pileup, pscore, fold_change, qscore, name))
+            self.peaks[chromosome] = []
+        self.peaks[chromosome].append(PeakContent(chromosome,
+                                                  start,
+                                                  end,
+                                                  summit,
+                                                  peak_score,
+                                                  pileup,
+                                                  pscore,
+                                                  fold_change,
+                                                  qscore,
+                                                  name))
         self.total += 1
         self.CO_sorted = False
 
-    cpdef add_PeakContent ( self, bytes chromosome, object peakcontent ):
+    @cython.ccall
+    def add_PeakContent(self,
+                        chromosome: bytes,
+                        peakcontent: PeakContent):
         if not self.peaks.has_key(chromosome):
-            self.peaks[chromosome]=[]
+            self.peaks[chromosome] = []
         self.peaks[chromosome].append(peakcontent)
         self.total += 1
         self.CO_sorted = False
 
-    cpdef list get_data_from_chrom (self, bytes chrom):
-        if not self.peaks.has_key( chrom ):
-            self.peaks[chrom]= []
+    @cython.ccall
+    def get_data_from_chrom(self, chrom: bytes) -> list:
+        if not self.peaks.has_key(chrom):
+            self.peaks[chrom] = []
         return self.peaks[chrom]
 
-    cpdef set get_chr_names (self):
+    def get_chr_names(self) -> set:
         return set(sorted(self.peaks.keys()))
 
-    def sort ( self ):
-        cdef:
-            list chrs
-            bytes chrom
+    def sort(self):
+        chrs: list
+        chrom: bytes
+
         # sort by position
         if self.CO_sorted:
             # if already sorted, quit
             return
         chrs = sorted(list(self.peaks.keys()))
         for chrom in sorted(chrs):
-            self.peaks[chrom].sort(key=lambda x:x['start'])
+            self.peaks[chrom].sort(key=lambda x: x['start'])
         self.CO_sorted = True
         return
 
-    cpdef object randomly_pick ( self, int n, int seed = 12345 ):
+    @cython.ccall
+    def randomly_pick(self, n: cython.int, seed: cython.int = 12345):
         """Shuffle the peaks and get n peaks out of it. Return a new
         PeakIO object.
         """
-        cdef:
-            list all_pc
-            list chrs
-            bytes chrom
-            object ret_peakio, p
+        all_pc: list
+        chrs: list
+        chrom: bytes
+        ret_peakio: PeakIO
+        p: PeakContent
+
         assert n > 0
         chrs = sorted(list(self.peaks.keys()))
         all_pc = []
         for chrom in sorted(chrs):
             all_pc.extend(self.peaks[chrom])
-        random.seed( seed )
-        random.shuffle( all_pc )
+        random.seed(seed)
+        random.shuffle(all_pc)
         all_pc = all_pc[:n]
         ret_peakio = PeakIO()
         for p in all_pc:
-            ret_peakio.add_PeakContent ( p["chrom"], p )
+            ret_peakio.add_PeakContent(p["chrom"], p)
         return ret_peakio
-    
-    cpdef void filter_pscore (self, double pscore_cut ):
-        cdef:
-            bytes chrom
-            dict new_peaks
-            list chrs
-            object p
+
+    @cython.ccall
+    def filter_pscore(self, pscore_cut: cython.double):
+        chrom: bytes
+        new_peaks: dict
+        chrs: list
+
         new_peaks = {}
         chrs = sorted(list(self.peaks.keys()))
         self.total = 0
         for chrom in sorted(chrs):
-            new_peaks[chrom]=[p for p in self.peaks[chrom] if p['pscore'] >= pscore_cut]
-            self.total +=  len( new_peaks[chrom] )
+            new_peaks[chrom] = [p for p in self.peaks[chrom] if p['pscore'] >= pscore_cut]
+            self.total += len(new_peaks[chrom])
         self.peaks = new_peaks
         self.CO_sorted = True
         self.sort()
 
-    cpdef void filter_qscore (self, double qscore_cut ):
-        cdef:
-            bytes chrom
-            dict new_peaks
-            list chrs
-            object p
+    @cython.ccall
+    def filter_qscore(self, qscore_cut: cython.double):
+        chrom: bytes
+        new_peaks: dict
+        chrs: list
 
         new_peaks = {}
         chrs = sorted(list(self.peaks.keys()))
         self.total = 0
         for chrom in sorted(chrs):
-            new_peaks[chrom]=[p for p in self.peaks[chrom] if p['qscore'] >= qscore_cut]
-            self.total +=  len( new_peaks[chrom] )
+            new_peaks[chrom] = [p for p in self.peaks[chrom] if p['qscore'] >= qscore_cut]
+            self.total += len(new_peaks[chrom])
         self.peaks = new_peaks
         self.CO_sorted = True
         self.sort()
 
-    cpdef void filter_fc (self, float fc_low, float fc_up = 0 ):
+    @cython.ccall
+    def filter_fc(self, fc_low: cython.float, fc_up: cython.float = 0):
         """Filter peaks in a given fc range.
 
         If fc_low and fc_up is assigned, the peaks with fc in [fc_low,fc_up)
 
         """
-        cdef:
-            bytes chrom
-            dict new_peaks
-            list chrs
-            object p
+        chrom: bytes
+        new_peaks: dict
+        chrs: list
 
         new_peaks = {}
         chrs = list(self.peaks.keys())
         self.total = 0
         if fc_up > 0 and fc_up > fc_low:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in self.peaks[chrom] if p['fc'] >= fc_low and p['fc']<fc_up]
-                self.total +=  len( new_peaks[chrom] )
+                new_peaks[chrom] = [p for p in self.peaks[chrom] if p['fc'] >= fc_low and p['fc'] < fc_up]
+                self.total += len(new_peaks[chrom])
         else:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in self.peaks[chrom] if p['fc'] >= fc_low]
-                self.total +=  len( new_peaks[chrom] )
+                new_peaks[chrom] = [p for p in self.peaks[chrom] if p['fc'] >= fc_low]
+                self.total += len(new_peaks[chrom])
         self.peaks = new_peaks
         self.CO_sorted = True
         self.sort()
 
-    cpdef void filter_score (self, float lower_score, float upper_score = 0 ):
+    def filter_score(self, lower_score: cython.float, upper_score: cython.float = 0):
         """Filter peaks in a given score range.
 
         """
-        cdef:
-            bytes chrom
-            dict new_peaks
-            list chrs
-            object p
+        chrom: bytes
+        new_peaks: dict
+        chrs: list
 
         new_peaks = {}
         chrs = list(self.peaks.keys())
         self.total = 0
         if upper_score > 0 and upper_score > lower_score:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in self.peaks[chrom] if p['score'] >= lower_score and p['score']<upper_score]
-                self.total +=  len( new_peaks[chrom] )
+                new_peaks[chrom] = [p for p in self.peaks[chrom] if p['score'] >= lower_score and p['score'] < upper_score]
+                self.total += len(new_peaks[chrom])
         else:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in self.peaks[chrom] if p['score'] >= lower_score]
-                self.total +=  len( new_peaks[chrom] )
+                new_peaks[chrom] = [p for p in self.peaks[chrom] if p['score'] >= lower_score]
+                self.total += len(new_peaks[chrom])
         self.peaks = new_peaks
         self.CO_sorted = True
         self.sort()
 
-    def __str__ (self):
+    def __str__(self):
         """convert to text -- for debug
         """
-        cdef:
-            list chrs
-            int n_peak
-            str ret            
+        chrs: list
+        n_peak: cython.int
+        ret: str
+
         ret = ""
         chrs = list(self.peaks.keys())
         n_peak = 0
@@ -318,38 +352,44 @@ def __str__ (self):
                 peaks = list(group)
                 if len(peaks) > 1:
                     for i, peak in enumerate(peaks):
-                        ret += "chrom:%s\tstart:%d\tend:%d\tname:peak_%d%s\tscore:%.6g\tsummit:%d\n" % (chrom.decode(),peak['start'],peak['end'],n_peak,subpeak_letters(i),peak["score"],peak["summit"])
+                        ret += "chrom:%s\tstart:%d\tend:%d\tname:peak_%d%s\tscore:%.6g\tsummit:%d\n" % (chrom.decode(), peak['start'], peak['end'], n_peak, subpeak_letters(i), peak["score"], peak["summit"])
                 else:
                     peak = peaks[0]
-                    ret += "chrom:%s\tstart:%d\tend:%d\tname:peak_%d\tscore:%.6g\tsummit:%d\n" % (chrom.decode(),peak['start'],peak['end'],n_peak,peak["score"],peak["summit"])                    
-                    
+                    ret += "chrom:%s\tstart:%d\tend:%d\tname:peak_%d\tscore:%.6g\tsummit:%d\n" % (chrom.decode(), peak['start'], peak['end'], n_peak, peak["score"], peak["summit"])
         return ret
 
-    cdef void _to_bed(self, bytes name_prefix=b"%s_peak_", bytes name=b"MACS",
-                      bytes description=b"%s", str score_column="score",
-                      bool trackline=False, print_func=sys.stdout.write):
+    @cython.cfunc
+    def _to_bed(self,
+                name_prefix: bytes = b"%s_peak_",
+                name: bytes = b"MACS",
+                description: bytes = b"%s",
+                score_column: str = "score",
+                trackline: bool = False,
+                print_func=sys.stdout.write):
         """
         generalization of tobed and write_to_bed
         """
-        cdef:
-            list chrs
-            int n_peak
-            bytes peakprefix, desc
+        chrs: list
+        n_peak: cython.int
+        peakprefix: bytes
+        desc: bytes
+
         chrs = list(self.peaks.keys())
         n_peak = 0
         try:
             peakprefix = name_prefix % name
-        except:
+        except Exception:
             peakprefix = name_prefix
         try:
             desc = description % name
-        except:
+        except Exception:
             desc = description
+
         if trackline:
             try:
-                print_func('track name="%s (peaks)" description="%s" visibility=1\n' % ( name.replace(b"\"", b"\\\"").decode(),
-                                                                                         desc.replace(b"\"", b"\\\"").decode() ) )
-            except:
+                print_func('track name="%s (peaks)" description="%s" visibility=1\n' % (name.replace(b"\"", b"\\\"").decode(),
+                                                                                        desc.replace(b"\"", b"\\\"").decode()))
+            except Exception:
                 print_func('track name=MACS description=Unknown\n')
         for chrom in sorted(chrs):
             for end, group in groupby(self.peaks[chrom], key=itemgetter("end")):
@@ -357,27 +397,43 @@ def __str__ (self):
                 peaks = list(group)
                 if len(peaks) > 1:
                     for i, peak in enumerate(peaks):
-                        print_func("%s\t%d\t%d\t%s%d%s\t%.6g\n" % (chrom.decode(),peak['start'],peak['end'],peakprefix.decode(),n_peak,subpeak_letters(i),peak[score_column]))
+                        print_func("%s\t%d\t%d\t%s%d%s\t%.6g\n" % (chrom.decode(), peak['start'], peak['end'], peakprefix.decode(), n_peak, subpeak_letters(i), peak[score_column]))
                 else:
                     peak = peaks[0]
-                    print_func("%s\t%d\t%d\t%s%d\t%.6g\n" % (chrom.decode(),peak['start'],peak['end'],peakprefix.decode(),n_peak,peak[score_column]))
-
-    cdef _to_summits_bed(self, bytes name_prefix=b"%s_peak_", bytes name=b"MACS",
-                        bytes description = b"%s", str score_column="score",
-                        bool trackline=False, print_func=sys.stdout.write):
+                    print_func("%s\t%d\t%d\t%s%d\t%.6g\n" % (chrom.decode(), peak['start'], peak['end'], peakprefix.decode(), n_peak, peak[score_column]))
+
+    @cython.cfunc
+    def _to_summits_bed(self,
+                        name_prefix: bytes = b"%s_peak_",
+                        name: bytes = b"MACS",
+                        description: bytes = b"%s",
+                        score_column: str = "score",
+                        trackline: bool = False,
+                        print_func=sys.stdout.write):
         """
         generalization of to_summits_bed and write_to_summit_bed
         """
+        chrs: list
+        n_peak: cython.int
+        peakprefix: bytes
+        desc: bytes
+
         chrs = list(self.peaks.keys())
         n_peak = 0
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
-        try: desc = description % name
-        except: desc = description
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
+        try:
+            desc = description % name
+        except Exception:
+            desc = description
         if trackline:
-            try: print_func('track name="%s (summits)" description="%s" visibility=1\n' % ( name.replace(b"\"", b"\\\"").decode(),\
-                                                                                            desc.replace(b"\"", b"\\\"").decode() ) )
-            except: print_func('track name=MACS description=Unknown')
+            try:
+                print_func('track name="%s (summits)" description="%s" visibility=1\n' % (name.replace(b"\"", b"\\\"").decode(),
+                                                                                          desc.replace(b"\"", b"\\\"").decode()))
+            except Exception:
+                print_func('track name=MACS description=Unknown')
         for chrom in sorted(chrs):
             for end, group in groupby(self.peaks[chrom], key=itemgetter("end")):
                 n_peak += 1
@@ -385,13 +441,13 @@ def __str__ (self):
                 if len(peaks) > 1:
                     for i, peak in enumerate(peaks):
                         summit_p = peak['summit']
-                        print_func("%s\t%d\t%d\t%s%d%s\t%.6g\n" % (chrom.decode(),summit_p,summit_p+1,peakprefix.decode(),n_peak,subpeak_letters(i),peak[score_column]))
+                        print_func("%s\t%d\t%d\t%s%d%s\t%.6g\n" % (chrom.decode(), summit_p, summit_p+1, peakprefix.decode(), n_peak, subpeak_letters(i), peak[score_column]))
                 else:
                     peak = peaks[0]
                     summit_p = peak['summit']
-                    print_func("%s\t%d\t%d\t%s%d\t%.6g\n" % (chrom.decode(),summit_p,summit_p+1,peakprefix.decode(),n_peak,peak[score_column]))
+                    print_func("%s\t%d\t%d\t%s%d\t%.6g\n" % (chrom.decode(), summit_p, summit_p+1, peakprefix.decode(), n_peak, peak[score_column]))
 
-    def tobed (self):
+    def tobed(self):
         """Print out peaks in BED5 format.
 
         Five columns are chromosome, peak start, peak end, peak name, and peak height.
@@ -408,7 +464,7 @@ def tobed (self):
         """
         return self._to_bed(name_prefix=b"peak_", score_column="score", name=b"", description=b"")
 
-    def to_summits_bed (self):
+    def to_summits_bed(self):
         """Print out peak summits in BED5 format.
 
         Five columns are chromosome, summit start, summit end, peak name, and peak height.
@@ -417,8 +473,12 @@ def to_summits_bed (self):
         return self._to_summits_bed(name_prefix=b"peak_", score_column="score", name=b"", description=b"")
 
     # these methods are very fast, specifying types is unnecessary
-    def write_to_bed (self, fhd, bytes name_prefix=b"peak_", bytes name=b"MACS",
-                        bytes description = b"%s", str score_column="score", trackline=True):
+    def write_to_bed(self, fhd,
+                     name_prefix: bytes = b"peak_",
+                     name: bytes = b"MACS",
+                     description: bytes = b"%s",
+                     score_column: str = "score",
+                     trackline: bool = True):
         """Write peaks in BED5 format in a file handler. Score (5th
         column) is decided by score_column setting. Check the
         following list. Name column ( 4th column) is made by putting
@@ -439,13 +499,20 @@ def write_to_bed (self, fhd, bytes name_prefix=b"peak_", bytes name=b"MACS",
         fc:fold_change,
         qscore:qvalue
         """
-        #print(description)
-        return self._to_bed(name_prefix=name_prefix, name=name,
-                            description=description, score_column=score_column,
-                            print_func=fhd.write, trackline=trackline)
-
-    def write_to_summit_bed (self, fhd, bytes name_prefix = b"peak_", bytes name = b"MACS",
-                             bytes description = b"%s", str score_column ="score", trackline=True):
+        # print(description)
+        return self._to_bed(name_prefix=name_prefix,
+                            name=name,
+                            description=description,
+                            score_column=score_column,
+                            print_func=fhd.write,
+                            trackline=trackline)
+
+    def write_to_summit_bed(self, fhd,
+                            name_prefix: bytes = b"%s_peak_",
+                            name: bytes = b"MACS",
+                            description: bytes = b"%s",
+                            score_column: str = "score",
+                            trackline: bool = False):
         """Write peak summits in BED5 format in a file handler. Score
         (5th column) is decided by score_column setting. Check the
         following list. Name column ( 4th column) is made by putting
@@ -469,7 +536,11 @@ def write_to_summit_bed (self, fhd, bytes name_prefix = b"peak_", bytes name = b
                                     description=description, score_column=score_column,
                                     print_func=fhd.write, trackline=trackline)
 
-    def write_to_narrowPeak (self, fhd, bytes name_prefix = b"peak_", bytes name = b"peak", str score_column="score", trackline=True):
+    def write_to_narrowPeak(self, fhd,
+                            name_prefix: bytes = b"%s_peak_",
+                            name: bytes = b"peak",
+                            score_column: str = "score",
+                            trackline: bool = False):
         """Print out peaks in narrowPeak format.
 
         This format is designed for ENCODE project, and basically a
@@ -523,33 +594,41 @@ def write_to_narrowPeak (self, fhd, bytes name_prefix = b"peak_", bytes name = b
         +-----------+------+----------------------------------------+
 
         """
-        cdef int n_peak
-        cdef bytes chrom
-        cdef long s
-        cdef str peakname
+        n_peak: cython.int
+        chrom: bytes
+        s: cython.long
+        peakname: str
 
         chrs = list(self.peaks.keys())
         n_peak = 0
         write = fhd.write
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
         if trackline:
             write("track type=narrowPeak name=\"%s\" description=\"%s\" nextItemButton=on\n" % (name.decode(), name.decode()))
         for chrom in sorted(chrs):
             for end, group in groupby(self.peaks[chrom], key=itemgetter("end")):
                 n_peak += 1
                 these_peaks = list(group)
-                if len(these_peaks) > 1: # from call-summits
+                if len(these_peaks) > 1:  # from call-summits
                     for i, peak in enumerate(these_peaks):
                         peakname = "%s%d%s" % (peakprefix.decode(), n_peak, subpeak_letters(i))
                         if peak['summit'] == -1:
                             s = -1
                         else:
                             s = peak['summit'] - peak['start']
-                        fhd.write( "%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\t%d\n"
-                                   %
-                                   (chrom.decode(),peak['start'],peak['end'],peakname,int(10*peak[score_column]),
-                                    peak['fc'],peak['pscore'],peak['qscore'],s) )
+                        fhd.write("%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\t%d\n" %
+                                  (chrom.decode(),
+                                   peak['start'],
+                                   peak['end'],
+                                   peakname,
+                                   int(10*peak[score_column]),
+                                   peak['fc'],
+                                   peak['pscore'],
+                                   peak['qscore'],
+                                   s))
                 else:
                     peak = these_peaks[0]
                     peakname = "%s%d" % (peakprefix.decode(), n_peak)
@@ -557,13 +636,22 @@ def write_to_narrowPeak (self, fhd, bytes name_prefix = b"peak_", bytes name = b
                         s = -1
                     else:
                         s = peak['summit'] - peak['start']
-                    fhd.write( "%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\t%d\n"
-                               %
-                               (chrom.decode(),peak['start'],peak['end'],peakname,int(10*peak[score_column]),
-                                peak['fc'],peak['pscore'],peak['qscore'],s) )
+                    fhd.write("%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\t%d\n" %
+                              (chrom.decode(),
+                               peak['start'],
+                               peak['end'],
+                               peakname,
+                               int(10*peak[score_column]),
+                               peak['fc'],
+                               peak['pscore'],
+                               peak['qscore'],
+                               s))
         return
 
-    def write_to_xls (self, ofhd, bytes name_prefix = b"%s_peak_", bytes name = b"MACS"):
+    @cython.ccall
+    def write_to_xls(self, ofhd,
+                     name_prefix: bytes = b"%s_peak_",
+                     name: bytes = b"MACS"):
         """Save the peak results in a tab-delimited plain text file
         with suffix .xls.
 
@@ -571,11 +659,19 @@ def write_to_xls (self, ofhd, bytes name_prefix = b"%s_peak_", bytes name = b"MA
         wait... why I have two write_to_xls in this class?
 
         """
+        peakprefix: bytes
+        chrs: list
+        these_peaks: list
+        n_peak: cython.int
+        i: cython.int
+
         write = ofhd.write
-        write("\t".join(("chr","start", "end",  "length",  "abs_summit", "pileup", "-log10(pvalue)", "fold_enrichment", "-log10(qvalue)", "name"))+"\n")
+        write("\t".join(("chr", "start", "end",  "length",  "abs_summit", "pileup", "-log10(pvalue)", "fold_enrichment", "-log10(qvalue)", "name"))+"\n")
 
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
 
         peaks = self.peaks
         chrs = list(peaks.keys())
@@ -587,47 +683,56 @@ def write_to_xls (self, ofhd, bytes name_prefix = b"%s_peak_", bytes name = b"MA
                 if len(these_peaks) > 1:
                     for i, peak in enumerate(these_peaks):
                         peakname = "%s%d%s" % (peakprefix.decode(), n_peak, subpeak_letters(i))
-                        #[start,end,end-start,summit,peak_height,number_tags,pvalue,fold_change,qvalue]
-                        write("%s\t%d\t%d\t%d" % (chrom.decode(),peak['start']+1,peak['end'],peak['length']))
-                        write("\t%d" % (peak['summit']+1)) # summit position
-                        write("\t%.6g" % (round(peak['pileup'],2))) # pileup height at summit
-                        write("\t%.6g" % (peak['pscore'])) # -log10pvalue at summit
-                        write("\t%.6g" % (peak['fc'])) # fold change at summit
-                        write("\t%.6g" % (peak['qscore'])) # -log10qvalue at summit
+                        # [start,end,end-start,summit,peak_height,number_tags,pvalue,fold_change,qvalue]
+                        write("%s\t%d\t%d\t%d" % (chrom.decode(),
+                                                  peak['start']+1,
+                                                  peak['end'],
+                                                  peak['length']))
+                        write("\t%d" % (peak['summit']+1))  # summit position
+                        write("\t%.6g" % (round(peak['pileup'], 2)))  # pileup height at summit
+                        write("\t%.6g" % (peak['pscore']))  # -log10pvalue at summit
+                        write("\t%.6g" % (peak['fc']))  # fold change at summit
+                        write("\t%.6g" % (peak['qscore']))  # -log10qvalue at summit
                         write("\t%s" % peakname)
                         write("\n")
                 else:
                     peak = these_peaks[0]
                     peakname = "%s%d" % (peakprefix.decode(), n_peak)
-                    #[start,end,end-start,summit,peak_height,number_tags,pvalue,fold_change,qvalue]
-                    write("%s\t%d\t%d\t%d" % (chrom.decode(),peak['start']+1,peak['end'],peak['length']))
-                    write("\t%d" % (peak['summit']+1)) # summit position
-                    write("\t%.6g" % (round(peak['pileup'],2))) # pileup height at summit
-                    write("\t%.6g" % (peak['pscore'])) # -log10pvalue at summit
-                    write("\t%.6g" % (peak['fc'])) # fold change at summit
-                    write("\t%.6g" % (peak['qscore'])) # -log10qvalue at summit
+                    # [start,end,end-start,summit,peak_height,number_tags,pvalue,fold_change,qvalue]
+                    write("%s\t%d\t%d\t%d" % (chrom.decode(),
+                                              peak['start']+1,
+                                              peak['end'],
+                                              peak['length']))
+                    write("\t%d" % (peak['summit']+1))  # summit position
+                    write("\t%.6g" % (round(peak['pileup'], 2)))  # pileup height at summit
+                    write("\t%.6g" % (peak['pscore']))  # -log10pvalue at summit
+                    write("\t%.6g" % (peak['fc']))  # fold change at summit
+                    write("\t%.6g" % (peak['qscore']))  # -log10qvalue at summit
                     write("\t%s" % peakname)
                     write("\n")
         return
 
-
-    cpdef void exclude (self, object peaksio2):
+    @cython.ccall
+    def exclude(self, peaksio2: object):
         """ Remove overlapping peaks in peaksio2, another PeakIO object.
 
         """
-        cdef:
-            dict peaks1, peaks2
-            list chrs1, chrs2
-            bytes k
-            dict ret_peaks
-            bool overlap_found
-            object r1, r2       # PeakContent objects
-            long n_rl1, n_rl2
+        peaks1: dict
+        peaks2: dict
+        chrs1: list
+        chrs2: list
+        k: bytes
+        ret_peaks: dict
+        overlap_found: bool
+        r1: PeakContent
+        r2: PeakContent
+        n_rl1: cython.long
+        n_rl2: cython.long
 
         self.sort()
         peaks1 = self.peaks
         self.total = 0
-        assert isinstance(peaksio2,PeakIO)
+        assert isinstance(peaksio2, PeakIO)
         peaksio2.sort()
         peaks2 = peaksio2.peaks
 
@@ -638,44 +743,44 @@ def write_to_xls (self, ofhd, bytes name_prefix = b"%s_peak_", bytes name = b"MA
             #print(f"chromosome {k}")
             if not chrs2.count(k):
                 # no such chromosome in peaks1, then don't touch the peaks in this chromosome
-                ret_peaks[ k ] = peaks1[ k ]
+                ret_peaks[k] = peaks1[k]
                 continue
-            ret_peaks[ k ] = []
-            n_rl1 = len( peaks1[k] )
-            n_rl2 = len( peaks2[k] )
-            rl1_k = iter( peaks1[k] ).__next__
-            rl2_k = iter( peaks2[k] ).__next__
+            ret_peaks[k] = []
+            n_rl1 = len(peaks1[k])
+            n_rl2 = len(peaks2[k])
+            rl1_k = iter(peaks1[k]).__next__
+            rl2_k = iter(peaks2[k]).__next__
             overlap_found = False
             r1 = rl1_k()
             n_rl1 -= 1
             r2 = rl2_k()
             n_rl2 -= 1
-            while ( True ):
+            while (True):
                 # we do this until there is no r1 or r2 left.
                 if r2["start"] < r1["end"] and r1["start"] < r2["end"]:
                     # since we found an overlap, r1 will be skipped/excluded
                     # and move to the next r1
                     overlap_found = True
-                    #print(f"found overlap of {r1['start']} {r1['end']} and {r2['start']} {r2['end']}, move to the next r1")
+                    # print(f"found overlap of {r1['start']} {r1['end']} and {r2['start']} {r2['end']}, move to the next r1")
                     n_rl1 -= 1
                     if n_rl1 >= 0:
                         r1 = rl1_k()
-                        #print(f"move to next r1 {r1['start']} {r1['end']}")
+                        # print(f"move to next r1 {r1['start']} {r1['end']}")
                         overlap_found = False
                         continue
                     else:
                         break
                 if r1["end"] < r2["end"]:
-                    #print(f"now we need to move r1 {r1['start']} {r1['end']}")
+                    # print(f"now we need to move r1 {r1['start']} {r1['end']}")
                     # in this case, we need to move to the next r1,
                     # we will check if overlap_found is true, if not, we put r1 in a new dict
                     if not overlap_found:
-                        #print(f"we add this r1 {r1['start']} {r1['end']} to list")
-                        ret_peaks[ k ].append( r1 )
+                        # print(f"we add this r1 {r1['start']} {r1['end']} to list")
+                        ret_peaks[k].append(r1)
                     n_rl1 -= 1
                     if n_rl1 >= 0:
                         r1 = rl1_k()
-                        #print(f"move to next r1 {r1['start']} {r1['end']}")
+                        # print(f"move to next r1 {r1['start']} {r1['end']}")
                         overlap_found = False
                     else:
                         # no more r1 left
@@ -685,54 +790,61 @@ def write_to_xls (self, ofhd, bytes name_prefix = b"%s_peak_", bytes name = b"MA
                     if n_rl2:
                         r2 = rl2_k()
                         n_rl2 -= 1
-                        #print(f"move to next r2 {r2['start']} {r2['end']}")                      
+                        # print(f"move to next r2 {r2['start']} {r2['end']}")
                     else:
                         # no more r2 left
                         break
             # add the rest of r1
-            #print( f"n_rl1: {n_rl1} n_rl2:{n_rl2} last overlap_found is {overlap_found}" )
-            #if overlap_found:
+            # print( f"n_rl1: {n_rl1} n_rl2:{n_rl2} last overlap_found is {overlap_found}" )
+            # if overlap_found:
             #    n_rl1 -= 1
             if n_rl1 >= 0:
-                ret_peaks[ k ].extend( peaks1[ k ][-n_rl1-1:] )
+                ret_peaks[k].extend(peaks1[k][-n_rl1-1:])
 
         for k in ret_peaks.keys():
-            self.total += len( ret_peaks[ k ] )
+            self.total += len(ret_peaks[k])
 
         self.peaks = ret_peaks
         self.CO_sorted = True
-        self.sort()        
+        self.sort()
         return
 
-    def read_from_xls (self, ofhd):
+    @cython.ccall
+    def read_from_xls(self, ofhd):
         """Save the peak results in a tab-delimited plain text file
         with suffix .xls.
 
         """
-        cdef:
-            bytes line = b''
-            bytes chrom = b''
-            int n_peak = 0
-            int start, end, length, summit
-            float pileup, pscore, fc, qscore
-            list fields
+        line: bytes = b''
+        chrom: bytes = b''
+        start: cython.int
+        end: cython.int
+        length: cython.int
+        summit: cython.int
+        pileup: cython.float
+        pscore: cython.float
+        fc: cython.float
+        qscore: cython.float
+        fields: list
+
         while True:
-            if not (line.startswith('#') or line.strip() == ''): break
+            if not (line.startswith('#') or line.strip() == ''):
+                break
             line = ofhd.readline()
 
         # sanity check
         columns = line.rstrip().split('\t')
-        for a,b in zip(columns, ("chr","start", "end",  "length", "abs_summit",
-                                 "pileup", "-log10(pvalue)", "fold_enrichment",
-                                 "-log10(qvalue)", "name")):
-            if not a==b: raise NotImplementedError('column %s not recognized', a)
+        for a, b in zip(columns, ("chr", "start", "end",  "length", "abs_summit",
+                                  "pileup", "-log10(pvalue)", "fold_enrichment",
+                                  "-log10(qvalue)", "name")):
+            if not a == b:
+                raise NotImplementedError('column %s not recognized', a)
 
         add = self.add
         split = str.split
         rstrip = str.rstrip
         for i, line in enumerate(ofhd.readlines()):
             fields = split(line, '\t')
-            peak = {}
             chrom = fields[0].encode()
             start = int(fields[1]) - 1
             end = int(fields[2])
@@ -748,68 +860,62 @@ def read_from_xls (self, ofhd):
             add(chrom, start, end, summit, qscore, pileup, pscore, fc, qscore,
                 peakname)
 
-cpdef parse_peakname(peakname):
-    """returns peaknumber, subpeak
-    """
-    cdef:
-        bytes peak_id, peaknumber, subpeak
-    peak_id = peakname.split(b'_')[-1]
-    x = re.split('(\D.*)', peak_id)
-    peaknumber = int(x[0])
-    try:
-        subpeak = x[1]
-    except IndexError:
-        subpeak = b''
-    return (peaknumber, subpeak)
-
-cdef class RegionIO:
+
+@cython.cclass
+class RegionIO:
     """For plain region of chrom, start and end
     """
-    cdef:
-        dict regions
-        bool __flag_sorted
+    regions: dict
+    __flag_sorted: bool
 
-    def __init__ (self):
-        self.regions= {}
+    def __init__(self):
+        self.regions = {}
         self.__flag_sorted = False
 
-    cpdef void add_loc ( self, bytes chrom, int start, int end ):
+    @cython.ccall
+    def add_loc(self, chrom: bytes, start: cython.int, end: cython.int):
         if self.regions.has_key(chrom):
-            self.regions[chrom].append( (start,end) )
+            self.regions[chrom].append((start, end))
         else:
-            self.regions[chrom] = [(start,end), ]
+            self.regions[chrom] = [(start, end), ]
         self.__flag_sorted = False
         return
 
-    cpdef void sort (self):
-        cdef bytes chrom
+    @cython.ccall
+    def sort(self):
+        chrom: bytes
 
         for chrom in sorted(list(self.regions.keys())):
             self.regions[chrom].sort()
         self.__flag_sorted = True
 
-    cpdef set get_chr_names (self):
+    @cython.ccall
+    def get_chr_names(self) -> set:
         return set(sorted(self.regions.keys()))
 
-    cpdef void merge_overlap ( self ):
+    @cython.ccall
+    def merge_overlap(self):
         """
         merge overlapping regions
         """
-        cdef:
-            bytes chrom
-            int s_new_region, e_new_region, i, j
-            dict regions, new_regions
-            list chrs, regions_chr
-            tuple prev_region
+        chrom: bytes
+        s_new_region: cython.int
+        e_new_region: cython.int
+        i: cython.int
+        regions: dict
+        new_regions: dict
+        chrs: list
+        regions_chr: list
+        prev_region: tuple
 
         if not self.__flag_sorted:
             self.sort()
         regions = self.regions
         new_regions = {}
-        chrs = sorted( list( regions.keys() ) )
-        for i in range( len( chrs ) ):
+        chrs = sorted(list(regions.keys()))
+        for i in range(len(chrs)):
             chrom = chrs[i]
-            new_regions[chrom]=[]
+            new_regions[chrom] = []
             n_append = new_regions[chrom].append
             prev_region = None
             regions_chr = regions[chrom]
@@ -821,7 +927,7 @@ def __init__ (self):
                     if regions_chr[i][0] <= prev_region[1]:
                         s_new_region = prev_region[0]
                         e_new_region = regions_chr[i][1]
-                        prev_region = (s_new_region,e_new_region)
+                        prev_region = (s_new_region, e_new_region)
                     else:
                         n_append(prev_region)
                         prev_region = regions_chr[i]
@@ -831,43 +937,53 @@ def __init__ (self):
         self.sort()
         return
 
-    cpdef write_to_bed (self, fhd ):
-        cdef:
-            int i
-            bytes chrom
-            list chrs
-            tuple region
+    @cython.ccall
+    def write_to_bed(self, fhd):
+        i: cython.int
+        chrom: bytes
+        chrs: list
+        region: tuple
 
         chrs = sorted(list(self.regions.keys()))
-        for i in range( len(chrs) ):
+        for i in range(len(chrs)):
             chrom = chrs[i]
             for region in self.regions[chrom]:
-                fhd.write( "%s\t%d\t%d\n" % (chrom.decode(),region[0],region[1] ) )
-
-
-cdef class BroadPeakContent:
-    cdef:
-        long start
-        long end
-        long length
-        float score
-        bytes thickStart
-        bytes thickEnd
-        long blockNum
-        bytes  blockSizes
-        bytes  blockStarts
-        float pileup
-        float pscore
-        float fc
-        float qscore
-        bytes name
-
-    def __init__ ( self, long start, long end, float score,
-                   bytes thickStart, bytes thickEnd,
-                   long blockNum, bytes blockSizes,
-                   bytes blockStarts, float pileup,
-                   float pscore, float fold_change,
-                   float qscore, bytes name = b"NA" ):
+                fhd.write("%s\t%d\t%d\n" % (chrom.decode(),
+                                            region[0],
+                                            region[1]))
+
+
+@cython.cclass
+class BroadPeakContent:
+    start: cython.int
+    end: cython.int
+    length: cython.int
+    score: cython.float
+    thickStart: bytes
+    thickEnd: bytes
+    blockNum: cython.int
+    blockSizes: bytes
+    blockStarts: bytes
+    pileup: cython.float
+    pscore: cython.float
+    fc: cython.float
+    qscore: cython.float
+    name: bytes
+
+    def __init__(self,
+                 start: cython.int,
+                 end: cython.int,
+                 score: cython.float,
+                 thickStart: bytes,
+                 thickEnd: bytes,
+                 blockNum: cython.int,
+                 blockSizes: bytes,
+                 blockStarts: bytes,
+                 pileup: cython.float,
+                 pscore: cython.float,
+                 fold_change: cython.float,
+                 qscore: cython.float,
+                 name: bytes = b"NA"):
         self.start = start
         self.end = end
         self.score = score
@@ -876,7 +992,6 @@ def __init__ ( self, long start, long end, float score,
         self.blockNum = blockNum
         self.blockSizes = blockSizes
         self.blockStarts = blockStarts
-
         self.length = end - start
         self.pileup = pileup
         self.pscore = pscore
@@ -884,7 +999,7 @@ def __init__ ( self, long start, long end, float score,
         self.qscore = qscore
         self.name = name
 
-    def __getitem__ ( self, a ):
+    def __getitem__(self, a):
         if a == "start":
             return self.start
         elif a == "end":
@@ -914,26 +1029,36 @@ def __getitem__ ( self, a ):
         elif a == "name":
             return self.name
 
-    def __str__ (self):
-        return "start:%d;end:%d;score:%f" % ( self.start, self.end, self.score )
+    def __str__(self):
+        return "start:%d;end:%d;score:%f" % (self.start, self.end, self.score)
 
 
-cdef class BroadPeakIO:
+@cython.cclass
+class BroadPeakIO:
     """IO for broad peak information.
 
     """
-    cdef:
-        dict peaks
+    peaks: dict
 
-    def __init__ (self):
+    def __init__(self):
         self.peaks = {}
 
-    def add (self, char * chromosome, long start, long end, long score = 0,
-             bytes thickStart=b".", bytes thickEnd=b".",
-             long blockNum=0, bytes blockSizes=b".",
-             bytes blockStarts=b".", float pileup = 0,
-             float pscore = 0, float fold_change = 0,
-             float qscore = 0, bytes name = b"NA" ):
+    @cython.ccall
+    def add(self,
+            chromosome: bytes,
+            start: cython.int,
+            end: cython.int,
+            score: cython.float = 0.0,
+            thickStart: bytes = b".",
+            thickEnd: bytes = b".",
+            blockNum: cython.int = 0,
+            blockSizes: bytes = b".",
+            blockStarts: bytes = b".",
+            pileup: cython.float = 0,
+            pscore: cython.float = 0,
+            fold_change: cython.float = 0,
+            qscore: cython.float = 0,
+            name: bytes = b"NA"):
         """items
         chromosome : chromosome name,
         start      : broad region start,
@@ -952,81 +1077,97 @@ def add (self, char * chromosome, long start, long end, long score = 0,
         """
         if not self.peaks.has_key(chromosome):
             self.peaks[chromosome] = []
-        self.peaks[chromosome].append( BroadPeakContent( start, end, score, thickStart, thickEnd,
-                                                         blockNum, blockSizes, blockStarts,
-                                                         pileup, pscore, fold_change, qscore, name ) )
-
-    def filter_pscore (self, double pscore_cut ):
-        cdef:
-            bytes chrom
-            dict peaks
-            dict new_peaks
-            list chrs
-            BroadPeakContent p
+        self.peaks[chromosome].append(BroadPeakContent(start,
+                                                       end,
+                                                       score,
+                                                       thickStart,
+                                                       thickEnd,
+                                                       blockNum,
+                                                       blockSizes,
+                                                       blockStarts,
+                                                       pileup,
+                                                       pscore,
+                                                       fold_change,
+                                                       qscore,
+                                                       name))
+
+    @cython.ccall
+    def filter_pscore(self, pscore_cut: cython.float):
+        chrom: bytes
+        peaks: dict
+        new_peaks: dict
+        chrs: list
 
         peaks = self.peaks
         new_peaks = {}
         chrs = list(peaks.keys())
 
         for chrom in sorted(chrs):
-            new_peaks[chrom]=[p for p in peaks[chrom] if p['pscore'] >= pscore_cut]
+            new_peaks[chrom] = [p for p in peaks[chrom] if p['pscore'] >= pscore_cut]
         self.peaks = new_peaks
 
-    def filter_qscore (self, double qscore_cut ):
-        cdef:
-            bytes chrom
-            dict peaks
-            dict new_peaks
-            list chrs
-            BroadPeakContent p
+    @cython.ccall
+    def filter_qscore(self, qscore_cut: cython.float):
+        chrom: bytes
+        peaks: dict
+        new_peaks: dict
+        chrs: list
 
         peaks = self.peaks
         new_peaks = {}
         chrs = list(peaks.keys())
 
         for chrom in sorted(chrs):
-            new_peaks[chrom]=[p for p in peaks[chrom] if p['qscore'] >= qscore_cut]
+            new_peaks[chrom] = [p for p in peaks[chrom] if p['qscore'] >= qscore_cut]
         self.peaks = new_peaks
 
-    def filter_fc (self, fc_low, fc_up=None ):
+    @cython.ccall
+    def filter_fc(self, fc_low: float, fc_up: float = -1):
         """Filter peaks in a given fc range.
 
-        If fc_low and fc_up is assigned, the peaks with fc in [fc_low,fc_up)
+        If fc_low and fc_up is assigned, the peaks with fc in
+        [fc_low,fc_up)
+
+        fc_up has to be a positive number, otherwise it won't be
+        applied.
 
         """
-        cdef:
-            bytes chrom
-            dict peaks
-            dict new_peaks
-            list chrs
-            BroadPeakContent p
+        chrom: bytes
+        peaks: dict
+        new_peaks: dict
+        chrs: list
 
         peaks = self.peaks
         new_peaks = {}
         chrs = list(peaks.keys())
-        if fc_up:
+        if fc_up >= 0:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in peaks[chrom] if p['fc'] >= fc_low and p['fc']<fc_up]
+                new_peaks[chrom] = [p for p in peaks[chrom] if p['fc'] >= fc_low and p['fc'] < fc_up]
         else:
             for chrom in sorted(chrs):
-                new_peaks[chrom]=[p for p in peaks[chrom] if p['fc'] >= fc_low]
+                new_peaks[chrom] = [p for p in peaks[chrom] if p['fc'] >= fc_low]
         self.peaks = new_peaks
 
-    def total (self):
-        cdef:
-            bytes chrom
-            dict peaks
-            list chrs
-            long x
+    @cython.ccall
+    def total(self):
+        chrom: bytes
+        peaks: dict
+        chrs: list
+        x: cython.long = 0
 
         peaks = self.peaks
         chrs = list(peaks.keys())
-        x = 0
         for chrom in sorted(chrs):
             x += len(peaks[chrom])
         return x
 
-    def write_to_gappedPeak (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak', bytes description=b"%s", str score_column="score", trackline=True):
+    @cython.ccall
+    def write_to_gappedPeak(self, fhd,
+                            name_prefix: bytes = b"peak_",
+                            name: bytes = b'peak',
+                            description: bytes = b"%s",
+                            score_column: str = "score",
+                            trackline: bool = True):
         """Print out peaks in gappedBed format. Only those with stronger enrichment regions are saved.
 
         This format is basically a BED12+3 format.
@@ -1095,24 +1236,49 @@ def write_to_gappedPeak (self, fhd, bytes name_prefix=b"peak_", bytes name=b'pea
         +--------------+------+----------------------------------------+
 
         """
+        chrs: list
+        n_peak: cython.int = 0
+        peak: BroadPeakContent
+        desc: bytes
+        peakprefix: bytes
+        chrom: bytes
+
         chrs = list(self.peaks.keys())
-        n_peak = 0
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
-        try: desc = description % name
-        except: desc = description
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
+        try:
+            desc = description % name
+        except Exception:
+            desc = description
         if trackline:
-            fhd.write("track name=\"%s\" description=\"%s\" type=gappedPeak nextItemButton=on\n" % (name.decode(), desc.decode()) )
+            fhd.write("track name=\"%s\" description=\"%s\" type=gappedPeak nextItemButton=on\n" % (name.decode(), desc.decode()))
         for chrom in sorted(chrs):
             for peak in self.peaks[chrom]:
                 n_peak += 1
                 if peak["thickStart"] != b".":
-                    fhd.write( "%s\t%d\t%d\t%s%d\t%d\t.\t0\t0\t0\t%d\t%s\t%s\t%.6g\t%.6g\t%.6g\n"
-                               %
-                               (chrom.decode(),peak["start"],peak["end"],peakprefix.decode(),n_peak,int(10*peak[score_column]),
-                                peak["blockNum"],peak["blockSizes"].decode(),peak["blockStarts"].decode(), peak['fc'], peak['pscore'], peak['qscore'] ) )
-
-    def write_to_Bed12 (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak', bytes description=b"%s", str score_column="score", trackline=True):
+                    fhd.write("%s\t%d\t%d\t%s%d\t%d\t.\t0\t0\t0\t%d\t%s\t%s\t%.6g\t%.6g\t%.6g\n" %
+                              (chrom.decode(),
+                               peak["start"],
+                               peak["end"],
+                               peakprefix.decode(),
+                               n_peak,
+                               int(10*peak[score_column]),
+                               peak["blockNum"],
+                               peak["blockSizes"].decode(),
+                               peak["blockStarts"].decode(),
+                               peak['fc'],
+                               peak['pscore'],
+                               peak['qscore']))
+
+    @cython.ccall
+    def write_to_Bed12(self, fhd,
+                       name_prefix: bytes = b"peak_",
+                       name: bytes = b'peak',
+                       description: bytes = b"%s",
+                       score_column: str = "score",
+                       trackline: bool = True):
         """Print out peaks in Bed12 format.
 
         +--------------+------+----------------------------------------+
@@ -1167,31 +1333,58 @@ def write_to_Bed12 (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak', b
         +--------------+------+----------------------------------------+
 
         """
+        chrs: list
+        n_peak: cython.int = 0
+        peakprefix: bytes
+        peak: BroadPeakContent
+        desc: bytes
+        peakprefix: bytes
+        chrom: bytes
+
         chrs = list(self.peaks.keys())
-        n_peak = 0
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
-        try: desc = description % name
-        except: desc = description
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
+        try:
+            desc = description % name
+        except Exception:
+            desc = description
         if trackline:
-            fhd.write("track name=\"%s\" description=\"%s\" type=bed nextItemButton=on\n" % (name.decode(), desc.decode()) )
+            fhd.write("track name=\"%s\" description=\"%s\" type=bed nextItemButton=on\n" % (name.decode(), desc.decode()))
         for chrom in sorted(chrs):
             for peak in self.peaks[chrom]:
                 n_peak += 1
                 if peak["thickStart"] == b".":
                     # this will violate gappedPeak format, since it's a complement like broadPeak line.
-                    fhd.write( "%s\t%d\t%d\t%s%d\t%d\t.\n"
-                               %
-                               (chrom.decode(),peak["start"],peak["end"],peakprefix.decode(),n_peak,int(10*peak[score_column]) ) )
+                    fhd.write("%s\t%d\t%d\t%s%d\t%d\t.\n" %
+                              (chrom.decode(),
+                               peak["start"],
+                               peak["end"],
+                               peakprefix.decode(),
+                               n_peak,
+                               int(10*peak[score_column])))
                 else:
-                    fhd.write( "%s\t%d\t%d\t%s%d\t%d\t.\t%s\t%s\t0\t%d\t%s\t%s\n"
-                               %
-                               (chrom.decode(), peak["start"], peak["end"], peakprefix.decode(), n_peak, int(10*peak[score_column]),
-                                peak["thickStart"].decode(), peak["thickEnd"].decode(),
-                                peak["blockNum"], peak["blockSizes"].decode(), peak["blockStarts"].decode() ))
-
-
-    def write_to_broadPeak (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak', bytes description=b"%s", str score_column="score", trackline=True):
+                    fhd.write("%s\t%d\t%d\t%s%d\t%d\t.\t%s\t%s\t0\t%d\t%s\t%s\n" %
+                              (chrom.decode(),
+                               peak["start"],
+                               peak["end"],
+                               peakprefix.decode(),
+                               n_peak,
+                               int(10*peak[score_column]),
+                               peak["thickStart"].decode(),
+                               peak["thickEnd"].decode(),
+                               peak["blockNum"],
+                               peak["blockSizes"].decode(),
+                               peak["blockStarts"].decode()))
+
+    @cython.ccall
+    def write_to_broadPeak(self, fhd,
+                           name_prefix: bytes = b"peak_",
+                           name: bytes = b'peak',
+                           description: bytes = b"%s",
+                           score_column: str = "score",
+                           trackline: bool = True):
         """Print out peaks in broadPeak format.
 
         This format is designed for ENCODE project, and basically a
@@ -1241,16 +1434,20 @@ def write_to_broadPeak (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak
         +-----------+------+----------------------------------------+
 
         """
-        cdef int n_peak
-        cdef bytes chrom
-        cdef long s
-        cdef str peakname
+        chrs: list
+        n_peak: cython.int = 0
+        peakprefix: bytes
+        peak: BroadPeakContent
+        peakprefix: bytes
+        chrom: bytes
+        peakname: str
 
         chrs = list(self.peaks.keys())
-        n_peak = 0
         write = fhd.write
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
         if trackline:
             write("track type=broadPeak name=\"%s\" description=\"%s\" nextItemButton=on\n" % (name.decode(), name.decode()))
         for chrom in sorted(chrs):
@@ -1259,13 +1456,21 @@ def write_to_broadPeak (self, fhd, bytes name_prefix=b"peak_", bytes name=b'peak
                 these_peaks = list(group)
                 peak = these_peaks[0]
                 peakname = "%s%d" % (peakprefix.decode(), n_peak)
-                fhd.write( "%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\n" %
-                           (chrom.decode(),peak['start'],peak['end'],peakname,int(10*peak[score_column]),
-                            peak['fc'],peak['pscore'],peak['qscore'] ) )
+                fhd.write("%s\t%d\t%d\t%s\t%d\t.\t%.6g\t%.6g\t%.6g\n" %
+                          (chrom.decode(),
+                           peak['start'],
+                           peak['end'],
+                           peakname,
+                           int(10*peak[score_column]),
+                           peak['fc'],
+                           peak['pscore'],
+                           peak['qscore']))
         return
 
-
-    def write_to_xls (self, ofhd, bytes name_prefix=b"%s_peak_", bytes name=b"MACS"):
+    @cython.ccall
+    def write_to_xls(self, ofhd,
+                     name_prefix: bytes = b"%s_peak_",
+                     name: bytes = b"MACS"):
         """Save the peak results in a tab-delimited plain text file
         with suffix .xls.
 
@@ -1273,11 +1478,21 @@ def write_to_xls (self, ofhd, bytes name_prefix=b"%s_peak_", bytes name=b"MACS")
         wait... why I have two write_to_xls in this class?
 
         """
+        chrom: bytes
+        chrs: list
+        peakprefix: bytes
+        peaks: dict
+        these_peaks: list
+        peak: BroadPeakContent
+        peakname: str
+
         write = ofhd.write
-        write("\t".join(("chr","start", "end",  "length",  "pileup", "-log10(pvalue)", "fold_enrichment", "-log10(qvalue)", "name"))+"\n")
+        write("\t".join(("chr", "start", "end",  "length",  "pileup", "-log10(pvalue)", "fold_enrichment", "-log10(qvalue)", "name"))+"\n")
 
-        try: peakprefix = name_prefix % name
-        except: peakprefix = name_prefix
+        try:
+            peakprefix = name_prefix % name
+        except Exception:
+            peakprefix = name_prefix
 
         peaks = self.peaks
         chrs = list(peaks.keys())
@@ -1288,11 +1503,14 @@ def write_to_xls (self, ofhd, bytes name_prefix=b"%s_peak_", bytes name=b"MACS")
                 these_peaks = list(group)
                 peak = these_peaks[0]
                 peakname = "%s%d" % (peakprefix.decode(), n_peak)
-                write("%s\t%d\t%d\t%d" % (chrom.decode(),peak['start']+1,peak['end'],peak['length']))
-                write("\t%.6g" % (round(peak['pileup'],2))) # pileup height at summit
-                write("\t%.6g" % (peak['pscore'])) # -log10pvalue at summit
-                write("\t%.6g" % (peak['fc'])) # fold change at summit
-                write("\t%.6g" % (peak['qscore'])) # -log10qvalue at summit
+                write("%s\t%d\t%d\t%d" % (chrom.decode(),
+                                          peak['start']+1,
+                                          peak['end'],
+                                          peak['length']))
+                write("\t%.6g" % (round(peak['pileup'], 2)))  # pileup height at summit
+                write("\t%.6g" % (peak['pscore']))  # -log10pvalue at summit
+                write("\t%.6g" % (peak['fc']))  # fold change at summit
+                write("\t%.6g" % (peak['qscore']))  # -log10qvalue at summit
                 write("\t%s" % peakname)
                 write("\n")
         return
diff --git a/MACS3/Signal/CallPeakUnit.pyx b/MACS3/Signal/CallPeakUnit.pyx
index c6ffb7b8..c83aba7e 100644
--- a/MACS3/Signal/CallPeakUnit.pyx
+++ b/MACS3/Signal/CallPeakUnit.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2022-09-15 17:06:17 Tao Liu>
+# Time-stamp: <2024-10-10 16:45:01 Tao Liu>
 
 """Module for Calculate Scores.
 
@@ -46,7 +46,7 @@ from libc.math cimport exp,log,log10, M_LN10, log1p, erf, sqrt, floor, ceil
 # MACS3 modules
 # ------------------------------------
 from MACS3.Signal.SignalProcessing import maxima, enforce_valleys, enforce_peakyness
-from MACS3.IO.PeakIO import PeakIO, BroadPeakIO, parse_peakname
+from MACS3.IO.PeakIO import PeakIO, BroadPeakIO
 from MACS3.Signal.FixWidthTrack import FWTrack
 from MACS3.Signal.PairedEndTrack import PETrackI
 from MACS3.Signal.Prob import poisson_cdf
diff --git a/MACS3/Signal/PairedEndTrack.py b/MACS3/Signal/PairedEndTrack.py
new file mode 100644
index 00000000..8273495a
--- /dev/null
+++ b/MACS3/Signal/PairedEndTrack.py
@@ -0,0 +1,730 @@
+# cython: language_level=3
+# cython: profile=True
+# Time-stamp: <2024-10-10 17:03:45 Tao Liu>
+
+"""Module for filter duplicate tags from paired-end data
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD License (see the file LICENSE included with
+the distribution).
+"""
+
+# ------------------------------------
+# Python modules
+# ------------------------------------
+import io
+import sys
+from array import array as pyarray
+from collections import Counter
+
+# ------------------------------------
+# MACS3 modules
+# ------------------------------------
+from MACS3.Signal.Pileup import (quick_pileup,
+                                 over_two_pv_array,
+                                 se_all_in_one_pileup)
+from MACS3.Signal.BedGraph import bedGraphTrackI
+from MACS3.Signal.PileupV2 import pileup_from_LR_hmmratac
+# ------------------------------------
+# Other modules
+# ------------------------------------
+import cython
+import numpy as np
+import cython.cimports.numpy as cnp
+from cython.cimports.cpython import bool
+from cython.cimports.libc.stdint import INT32_MAX as INT_MAX
+
+from MACS3.Utilities.Logger import logging
+
+logger = logging.getLogger(__name__)
+debug = logger.debug
+info = logger.info
+
+# Let numpy enforce PE-ness using ndarray, gives bonus speedup when sorting
+# PE data doesn't have strandedness
+
+
+@cython.cclass
+class PETrackI:
+    """Paired End Locations Track class I along the whole genome
+    (commonly with the same annotation type), which are stored in a
+    dict.
+
+    Locations are stored and organized by sequence names (chr names) in a
+    dict. They can be sorted by calling self.sort() function.
+    """
+    __locations = cython.declare(dict, visibility="public")
+    __size = cython.declare(dict, visibility="public")
+    __buf_size = cython.declare(dict, visibility="public")
+    __sorted = cython.declare(bool, visibility="public")
+    total = cython.declare(cython.ulong, visibility="public")
+    annotation = cython.declare(str, visibility="public")
+    rlengths = cython.declare(dict, visibility="public")
+    buffer_size = cython.declare(cython.long, visibility="public")
+    length = cython.declare(cython.long, visibility="public")
+    average_template_length = cython.declare(cython.float, visibility="public")
+    __destroyed: bool
+
+    def __init__(self, anno: str = "", buffer_size: cython.long = 100000):
+        """fw is the fixed-width for all locations.
+
+        """
+        # dictionary with chrname as key, nparray with
+        # [('l','int32'),('r','int32')] as value
+        self.__locations = {}
+        # dictionary with chrname as key, size of the above nparray as value
+        self.__size = {}
+        # dictionary with chrname as key, size of the above nparray as value
+        self.__buf_size = {}
+        self.__sorted = False
+        self.total = 0           # total fragments
+        self.annotation = anno   # need to be figured out
+        self.rlengths = {}
+        self.buffer_size = buffer_size
+        self.length = 0
+        self.average_template_length = 0.0
+
+    @cython.ccall
+    def add_loc(self, chromosome: bytes,
+                start: cython.int, end: cython.int):
+        """Add a location to the list according to the sequence name.
+
+        chromosome -- mostly the chromosome name
+        fiveendpos -- 5' end pos, left for plus strand, right for neg strand
+        """
+        i: cython.int
+
+        if chromosome not in self.__locations:
+            self.__buf_size[chromosome] = self.buffer_size
+            # note: ['l'] is the leftmost end, ['r'] is the rightmost end of fragment.
+            self.__locations[chromosome] = np.zeros(shape=self.buffer_size,
+                                                    dtype=[('l', 'i4'), ('r', 'i4')])
+            self.__locations[chromosome][0] = (start, end)
+            self.__size[chromosome] = 1
+        else:
+            i = self.__size[chromosome]
+            if self.__buf_size[chromosome] == i:
+                self.__buf_size[chromosome] += self.buffer_size
+                self.__locations[chromosome].resize((self.__buf_size[chromosome]),
+                                                    refcheck=False)
+            self.__locations[chromosome][i] = (start, end)
+            self.__size[chromosome] = i + 1
+        self.length += end - start
+        return
+
+    @cython.ccall
+    def destroy(self):
+        """Destroy this object and release mem.
+        """
+        chrs: set
+        chromosome: bytes
+
+        chrs = self.get_chr_names()
+        for chromosome in sorted(chrs):
+            if chromosome in self.__locations:
+                self.__locations[chromosome].resize(self.buffer_size,
+                                                    refcheck=False)
+                self.__locations[chromosome].resize(0,
+                                                    refcheck=False)
+                self.__locations[chromosome] = None
+                self.__locations.pop(chromosome)
+        self.__destroyed = True
+        return
+
+    @cython.ccall
+    def set_rlengths(self, rlengths: dict) -> bool:
+        """Set reference chromosome lengths dictionary.
+
+        Only the chromosome existing in this petrack object will be updated.
+
+        If a chromosome in this petrack is not covered by given
+        rlengths, and it has no associated length, it will be set as
+        maximum integer.
+        """
+        valid_chroms: set
+        missed_chroms: set
+        chrom: bytes
+
+        valid_chroms = set(self.__locations.keys()).intersection(rlengths.keys())
+        for chrom in sorted(valid_chroms):
+            self.rlengths[chrom] = rlengths[chrom]
+        missed_chroms = set(self.__locations.keys()).difference(rlengths.keys())
+        for chrom in sorted(missed_chroms):
+            self.rlengths[chrom] = INT_MAX
+        return True
+
+    @cython.ccall
+    def get_rlengths(self) -> dict:
+        """Get reference chromosome lengths dictionary.
+
+        If self.rlengths is empty, create a new dict where the length of
+        chromosome will be set as the maximum integer.
+        """
+        if not self.rlengths:
+            self.rlengths = dict([(k, INT_MAX) for k in self.__locations.keys()])
+        return self.rlengths
+
+    @cython.ccall
+    def finalize(self):
+        """ Resize np arrays for 5' positions and sort them in place
+
+        Note: If this function is called, it's impossible to append more files to this FWTrack object. So remember to call it after all the files are read!
+        """
+        c: bytes
+        chrnames: set
+
+        self.total = 0
+
+        chrnames = self.get_chr_names()
+
+        for c in chrnames:
+            self.__locations[c].resize((self.__size[c]), refcheck=False)
+            self.__locations[c].sort(order=['l', 'r'])
+            self.total += self.__size[c]
+
+        self.__sorted = True
+        self.average_template_length = cython.cast(cython.float, self.length) / self.total
+        return
+
+    @cython.ccall
+    def get_locations_by_chr(self, chromosome: bytes):
+        """Return a tuple of two lists of locations for certain chromosome.
+
+        """
+        if chromosome in self.__locations:
+            return self.__locations[chromosome]
+        else:
+            raise Exception("No such chromosome name (%s) in TrackI object!\n" % (chromosome))
+
+    @cython.ccall
+    def get_chr_names(self) -> set:
+        """Return all the chromosome names in this track object as a python set.
+        """
+        return set(self.__locations.keys())
+
+    @cython.ccall
+    def sort(self):
+        """Naive sorting for locations.
+
+        """
+        c: bytes
+        chrnames: set
+
+        chrnames = self.get_chr_names()
+
+        for c in chrnames:
+            self.__locations[c].sort(order=['l', 'r'])  # sort by the leftmost location
+        self.__sorted = True
+        return
+
+    @cython.ccall
+    def count_fraglengths(self) -> dict:
+        """Return a dictionary of the counts for sizes/fragment
+        lengths of each pair.
+
+        This function is for HMMRATAC.
+
+        """
+        sizes: cnp.ndarray(cnp.int32_t, ndim=1)
+        s: cython.int
+        locs: cnp.ndarray
+        chrnames: list
+        i: cython.int
+
+        counter = Counter()
+        chrnames = list(self.get_chr_names())
+        for i in range(len(chrnames)):
+            locs = self.__locations[chrnames[i]]
+            sizes = locs['r'] - locs['l']
+            for s in sizes:
+                counter[s] += 1
+        return dict(counter)
+
+    @cython.ccall
+    def fraglengths(self) -> cnp.ndarray:
+        """Return the sizes/fragment lengths of each pair.
+
+        This function is for HMMRATAC EM training.
+        """
+        sizes: cnp.ndarray(np.int32_t, ndim=1)
+        locs: cnp.ndarray
+        chrnames: list
+        i: cython.int
+
+        chrnames = list(self.get_chr_names())
+        locs = self.__locations[chrnames[0]]
+        sizes = locs['r'] - locs['l']
+        for i in range(1, len(chrnames)):
+            locs = self.__locations[chrnames[i]]
+            sizes = np.concatenate((sizes, locs['r'] - locs['l']))
+        return sizes
+
+    @cython.boundscheck(False)  # do not check that np indices are valid
+    @cython.ccall
+    def filter_dup(self, maxnum: cython.int = -1):
+        """Filter the duplicated reads.
+
+        Run it right after you add all data into this object.
+        """
+        n: cython.int
+        loc_start: cython.int
+        loc_end: cython.int
+        current_loc_start: cython.int
+        current_loc_end: cython.int
+        i: cython.ulong
+        locs_size: cython.ulong
+        k: bytes
+        locs: cnp.ndarray
+        chrnames: set
+        selected_idx: cnp.ndarray
+
+        if maxnum < 0:
+            return              # condition to return if not filtering
+
+        if not self.__sorted:
+            self.sort()
+
+        self.total = 0
+        # self.length = 0
+        self.average_template_length = 0.0
+
+        chrnames = self.get_chr_names()
+
+        for k in chrnames:      # for each chromosome
+            locs = self.__locations[k]
+            locs_size = locs.shape[0]
+            if locs_size == 1:
+                # do nothing and continue
+                continue
+            # discard duplicate reads and make a new __locations[k]
+            # initialize boolean array as all TRUE, or all being kept
+            selected_idx = np.ones(locs_size, dtype=bool)
+            # get the first loc
+            (current_loc_start, current_loc_end) = locs[0]
+            i = 1               # index of new_locs
+            n = 1  # the number of tags in the current genomic location
+            for i in range(1, locs_size):
+                (loc_start, loc_end) = locs[i]
+                if loc_start != current_loc_start or loc_end != current_loc_end:
+                    # not the same, update currnet_loc_start/end/l, reset n
+                    current_loc_start = loc_start
+                    current_loc_end = loc_end
+                    n = 1
+                    continue
+                else:
+                    # both ends are the same, add 1 to duplicate number n
+                    n += 1
+                    if n > maxnum:
+                        # change the flag to False
+                        selected_idx[i] = False
+                        # subtract current_loc_l from self.length
+                        self.length -= current_loc_end - current_loc_start
+            self.__locations[k] = locs[selected_idx]
+            self.__size[k] = self.__locations[k].shape[0]
+            self.total += self.__size[k]
+            # free memory?
+            # I know I should shrink it to 0 size directly,
+            # however, on Mac OSX, it seems directly assigning 0
+            # doesn't do a thing.
+            selected_idx.resize(self.buffer_size, refcheck=False)
+            selected_idx.resize(0, refcheck=False)
+        self.average_template_length = self.length / self.total
+        return
+
+    @cython.ccall
+    def sample_percent(self, percent: cython.float, seed: cython.int = -1):
+        """Sample the tags for a given percentage.
+
+        Warning: the current object is changed! If a new PETrackI is
+        wanted, use sample_percent_copy instead.
+
+        """
+        # num: number of reads allowed on a certain chromosome
+        num: cython.uint
+        k: bytes
+        chrnames: set
+
+        self.total = 0
+        self.length = 0
+        self.average_template_length = 0.0
+
+        chrnames = self.get_chr_names()
+
+        if seed >= 0:
+            info(f"#   A random seed {seed} has been used")
+            rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(seed)))
+            rs_shuffle = rs.shuffle
+        else:
+            rs_shuffle = np.random.shuffle
+
+        for k in sorted(chrnames):
+            # for each chromosome.
+            # This loop body is too big, I may need to split code later...
+
+            num = cython.cast(cython.uint,
+                              round(self.__locations[k].shape[0] * percent, 5))
+            rs_shuffle(self.__locations[k])
+            self.__locations[k].resize(num, refcheck=False)
+            self.__locations[k].sort(order=['l', 'r'])  # sort by leftmost positions
+            self.__size[k] = self.__locations[k].shape[0]
+            self.length += (self.__locations[k]['r'] - self.__locations[k]['l']).sum()
+            self.total += self.__size[k]
+        self.average_template_length = cython.cast(cython.float, self.length)/self.total
+        return
+
+    @cython.ccall
+    def sample_percent_copy(self, percent: cython.float, seed: cython.int = -1):
+        """Sample the tags for a given percentage. Return a new PETrackI object
+
+        """
+        # num: number of reads allowed on a certain chromosome
+        num: cython.uint
+        k: bytes
+        chrnames: set
+        ret_petrackI: PETrackI
+        loc: cnp.ndarray
+
+        ret_petrackI = PETrackI(anno=self.annotation, buffer_size=self.buffer_size)
+        chrnames = self.get_chr_names()
+
+        if seed >= 0:
+            info(f"# A random seed {seed} has been used in the sampling function")
+            rs = np.random.default_rng(seed)
+        else:
+            rs = np.random.default_rng()
+
+        rs_shuffle = rs.shuffle
+
+        # chrnames need to be sorted otherwise we can't assure reproducibility
+        for k in sorted(chrnames):
+            # for each chromosome.
+            # This loop body is too big, I may need to split code later...
+            loc = np.copy(self.__locations[k])
+            num = cython.cast(cython.uint, round(loc.shape[0] * percent, 5))
+            rs_shuffle(loc)
+            loc.resize(num, refcheck=False)
+            loc.sort(order=['l', 'r'])  # sort by leftmost positions
+            ret_petrackI.__locations[k] = loc
+            ret_petrackI.__size[k] = loc.shape[0]
+            ret_petrackI.length += (loc['r'] - loc['l']).sum()
+            ret_petrackI.total += ret_petrackI.__size[k]
+        ret_petrackI.average_template_length = cython.cast(cython.float, ret_petrackI.length)/ret_petrackI.total
+        ret_petrackI.set_rlengths(self.get_rlengths())
+        return ret_petrackI
+
+    @cython.ccall
+    def sample_num(self, samplesize: cython.ulong, seed: cython.int = -1):
+        """Sample the tags for a given number.
+
+        Warning: the current object is changed!
+        """
+        percent: cython.float
+
+        percent = cython.cast(cython.float, samplesize)/self.total
+        self.sample_percent(percent, seed)
+        return
+
+    @cython.ccall
+    def sample_num_copy(self, samplesize: cython.ulong, seed: cython.int = -1):
+        """Sample the tags for a given number.
+
+        Warning: the current object is changed!
+        """
+        percent: cython.float
+
+        percent = cython.cast(cython.float, samplesize)/self.total
+        return self.sample_percent_copy(percent, seed)
+
+    @cython.ccall
+    def print_to_bed(self, fhd=None):
+        """Output to BEDPE format files. If fhd is given, write to a
+        file, otherwise, output to standard output.
+
+        """
+        i: cython.int
+        s: cython.int
+        e: cython.int
+        k: bytes
+        chrnames: set
+
+        if not fhd:
+            fhd = sys.stdout
+        assert isinstance(fhd, io.IOBase)
+
+        chrnames = self.get_chr_names()
+
+        for k in chrnames:
+            # for each chromosome.
+            # This loop body is too big, I may need to split code later...
+
+            locs = self.__locations[k]
+
+            for i in range(locs.shape[0]):
+                s, e = locs[i]
+                fhd.write("%s\t%d\t%d\n" % (k.decode(), s, e))
+        return
+
+    @cython.ccall
+    def pileup_a_chromosome(self,
+                            chrom: bytes,
+                            scale_factor_s: list,
+                            baseline_value: cython.float = 0.0) -> list:
+        """pileup a certain chromosome, return [p,v] (end position and
+        value) list.
+
+        scale_factor_s : linearly scale the pileup value applied to
+                         each d in ds. The list should have the same
+                         length as ds.
+
+        baseline_value : a value to be filled for missing values, and
+                         will be the minimum pileup.
+
+        """
+        tmp_pileup: list
+        prev_pileup: list
+        scale_factor: cython.float
+
+        prev_pileup = None
+
+        for i in range(len(scale_factor_s)):
+            scale_factor = scale_factor_s[i]
+
+            # Can't directly pass partial nparray there since that will mess up with pointer calculation.
+            tmp_pileup = quick_pileup(np.sort(self.__locations[chrom]['l']),
+                                      np.sort(self.__locations[chrom]['r']),
+                                      scale_factor, baseline_value)
+
+            if prev_pileup:
+                prev_pileup = over_two_pv_array(prev_pileup,
+                                                tmp_pileup,
+                                                func="max")
+            else:
+                prev_pileup = tmp_pileup
+
+        return prev_pileup
+
+    @cython.ccall
+    def pileup_a_chromosome_c(self,
+                              chrom: bytes,
+                              ds: list,
+                              scale_factor_s: list,
+                              baseline_value: cython.float = 0.0) -> list:
+        """pileup a certain chromosome, return [p,v] (end position and
+        value) list.
+
+        This function is for control track. Basically, here is a
+        simplified function from FixWidthTrack. We pretend the PE is
+        SE data and left read is on plus strand and right read is on
+        minus strand.
+
+        ds : tag will be extended to this value to 3' direction,
+             unless directional is False. Can contain multiple
+             extension values. Final pileup will the maximum.
+        scale_factor_s : linearly scale the pileup value applied to
+                         each d in ds. The list should have the same
+                         length as ds.
+        baseline_value : a value to be filled for missing values, and
+                         will be the minimum pileup.
+        """
+        tmp_pileup: list
+        prev_pileup: list
+        scale_factor: cython.float
+        d: cython.long
+        five_shift: cython.long
+        three_shift: cython.long
+        rlength: cython.long = self.get_rlengths()[chrom]
+
+        if not self.__sorted:
+            self.sort()
+
+        assert len(ds) == len(scale_factor_s), "ds and scale_factor_s must have the same length!"
+
+        prev_pileup = None
+
+        for i in range(len(scale_factor_s)):
+            d = ds[i]
+            scale_factor = scale_factor_s[i]
+            five_shift = d//2
+            three_shift = d//2
+
+            tmp_pileup = se_all_in_one_pileup(self.__locations[chrom]['l'],
+                                              self.__locations[chrom]['r'],
+                                              five_shift,
+                                              three_shift,
+                                              rlength,
+                                              scale_factor,
+                                              baseline_value)
+
+            if prev_pileup:
+                prev_pileup = over_two_pv_array(prev_pileup,
+                                                tmp_pileup,
+                                                func="max")
+            else:
+                prev_pileup = tmp_pileup
+
+        return prev_pileup
+
+    @cython.ccall
+    def pileup_bdg(self,
+                   scale_factor_s: list,
+                   baseline_value: cython.float = 0.0):
+        """pileup all chromosomes, and return a bedGraphTrackI object.
+
+        scale_factor_s : linearly scale the pileup value applied to
+                         each d in ds. The list should have the same
+                         length as ds.
+
+        baseline_value : a value to be filled for missing values, and
+                         will be the minimum pileup.
+
+        """
+        tmp_pileup: list
+        prev_pileup: list
+        scale_factor: cython.float
+        chrom: bytes
+        bdg: bedGraphTrackI
+
+        bdg = bedGraphTrackI(baseline_value=baseline_value)
+
+        for chrom in sorted(self.get_chr_names()):
+            prev_pileup = None
+            for i in range(len(scale_factor_s)):
+                scale_factor = scale_factor_s[i]
+
+                # Can't directly pass partial nparray there since that
+                # will mess up with pointer calculation.
+                tmp_pileup = quick_pileup(np.sort(self.__locations[chrom]['l']),
+                                          np.sort(self.__locations[chrom]['r']),
+                                          scale_factor,
+                                          baseline_value)
+
+                if prev_pileup:
+                    prev_pileup = over_two_pv_array(prev_pileup,
+                                                    tmp_pileup,
+                                                    func="max")
+                else:
+                    prev_pileup = tmp_pileup
+            # save to bedGraph
+            bdg.add_chrom_data(chrom,
+                               pyarray('i', prev_pileup[0]),
+                               pyarray('f', prev_pileup[1]))
+        return bdg
+
+    @cython.ccall
+    def pileup_bdg_hmmr(self,
+                        mapping: list,
+                        baseline_value: cython.float = 0.0) -> list:
+        """pileup all chromosomes, and return a list of four
+        bedGraphTrackI objects: short, mono, di, and tri nucleosomal
+        signals.
+
+        The idea is that for each fragment length, we generate four
+        bdg using four weights from four distributions. Then we add
+        all sets of four bdgs together.
+
+        Way to generate 'mapping', based on HMMR EM means and stddevs:
+        fl_dict = petrack.count_fraglengths()
+        fl_list = list(fl_dict.keys())
+        fl_list.sort()
+        weight_mapping = generate_weight_mapping(fl_list, em_means, em_stddevs)
+
+        """
+        ret_pileup: list
+        chroms: set
+        chrom: bytes
+        i: cython.int
+
+        ret_pileup = []
+        for i in range(len(mapping)):
+            ret_pileup.append({})
+        chroms = self.get_chr_names()
+        for i in range(len(mapping)):
+            for chrom in sorted(chroms):
+                ret_pileup[i][chrom] = pileup_from_LR_hmmratac(self.__locations[chrom], mapping[i])
+        return ret_pileup
+
+
+@cython.cclass
+class PEtrackII(PETrackI):
+    """Documentation for PEtrac
+
+    """
+    # add another dict for storing barcode for each fragment
+    __barcode = cython.declare(dict, visibility="public")
+    __barcode_dict = cython.declare(dict, visibility="public")
+    # add another dict for storing counts for each fragment
+    __counts = cython.declare(dict, visibility="public")
+
+    def __init__(self, args):
+        super(PETrackI, self).__init__()
+        self.__barcodes = {}
+        self.__barcode_dict = {}
+
+    @cython.ccall
+    def add_frag(self,
+                 chromosome: bytes,
+                 start: cython.int,
+                 end: cython.int,
+                 barcode: bytes,
+                 count: cython.uchar):
+        """Add a location to the list according to the sequence name.
+
+        chromosome: mostly the chromosome name
+        start: left position of the fragment
+        end: right position of the fragment
+        barcode: the barcode of the fragment
+        count: the count of the fragment
+        """
+        i: cython.int
+        h: cython.long
+
+        h = hash(barcode)
+        self.__barcode_dict[h] = barcode
+
+        if chromosome not in self.__locations:
+            self.__buf_size[chromosome] = self.buffer_size
+            # note: ['l'] is the leftmost end, ['r'] is the rightmost end of fragment.
+            self.__locations[chromosome] = np.zeros(shape=self.buffer_size,
+                                                    dtype=[('l', 'i4'), ('r', 'i4'), ('c', 'u1')])
+            self.__barcodes[chromosome] = np.zeros(shape=self.buffer_size,
+                                                   dtype='i4')
+            self.__locations[chromosome][0] = (start, end, count)
+            self.__barcodes[chromosome][0] = h
+            self.__size[chromosome] = 1
+        else:
+            i = self.__size[chromosome]
+            if self.__buf_size[chromosome] == i:
+                self.__buf_size[chromosome] += self.buffer_size
+                self.__locations[chromosome].resize((self.__buf_size[chromosome]),
+                                                    refcheck=False)
+            self.__locations[chromosome][i] = (start, end, count)
+            self.__barcodes[chromosome][i] = h
+            self.__size[chromosome] = i + 1
+        self.length += end - start
+        return
+
+    @cython.ccall
+    def destroy(self):
+        """Destroy this object and release mem.
+        """
+        chrs: set
+        chromosome: bytes
+
+        chrs = self.get_chr_names()
+        for chromosome in sorted(chrs):
+            if chromosome in self.__locations:
+                self.__locations[chromosome].resize(self.buffer_size,
+                                                    refcheck=False)
+                self.__locations[chromosome].resize(0,
+                                                    refcheck=False)
+                self.__locations[chromosome] = None
+                self.__locations.pop(chromosome)
+                self.__barcodes.resize(self.buffer_size,
+                                       refcheck=False)
+                self.__barcodes.resize(0,
+                                       refcheck=False)
+                self.__barcodes[chromosome] = None
+                self.__barcodes.pop(chromosome)
+        self.__barcode_dict = {}
+        self.__destroyed = True
+        return
diff --git a/MACS3/Signal/PairedEndTrack.pyx b/MACS3/Signal/PairedEndTrack.pyx
deleted file mode 100644
index 808f5d1c..00000000
--- a/MACS3/Signal/PairedEndTrack.pyx
+++ /dev/null
@@ -1,584 +0,0 @@
-# cython: language_level=3
-# cython: profile=True
-# Time-stamp: <2022-09-15 17:07:26 Tao Liu>
-
-"""Module for filter duplicate tags from paired-end data
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD License (see the file LICENSE included with
-the distribution).
-"""
-
-# ------------------------------------
-# Python modules
-# ------------------------------------
-import io
-import sys
-from copy import copy
-from array import array as pyarray
-from collections import Counter
-
-import logging
-import MACS3.Utilities.Logger
-
-logger = logging.getLogger(__name__)
-debug   = logger.debug
-info    = logger.info
-# ------------------------------------
-# MACS3 modules
-# ------------------------------------
-from MACS3.Utilities.Constants import *
-from MACS3.Signal.Pileup import quick_pileup, over_two_pv_array, se_all_in_one_pileup
-from MACS3.Signal.BedGraph import bedGraphTrackI
-from MACS3.Signal.PileupV2 import pileup_from_LR_hmmratac
-# ------------------------------------
-# Other modules
-# ------------------------------------
-import numpy as np
-cimport numpy as np
-from numpy cimport uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t, float32_t, float64_t
-from cpython cimport bool
-cimport cython
-
-
-cdef INT_MAX = <int32_t>((<uint32_t>(-1))>>1)
-
-# We don't use the following structs anymore
-# cdef packed struct peLoc:
-#     int32_t l
-#     int32_t r
-
-# cdef class PETrackChromosome:
-#     cdef:
-#         public np.ndarray locations
-#         public uint32_t pointer
-#         public uint32_t buffer_size
-#         public uint64_t coverage
-#         public uint64_t chrlen
-#         uint32_t __buffer_increment
-#         bool __sorted
-#         bool __destroyed
-
-# Let numpy enforce PE-ness using ndarray, gives bonus speedup when sorting
-# PE data doesn't have strandedness
-
-cdef class PETrackI:
-    """Paired End Locations Track class I along the whole genome
-    (commonly with the same annotation type), which are stored in a
-    dict.
-
-    Locations are stored and organized by sequence names (chr names) in a
-    dict. They can be sorted by calling self.sort() function.
-    """
-    cdef:
-        public dict __locations
-        public dict __size
-        public dict __buf_size
-        public bool __sorted
-        public uint64_t total
-        public object annotation
-        public dict rlengths
-        public int64_t buffer_size
-        public int64_t length
-        public float32_t average_template_length
-        bool   __destroyed
-
-    def __init__ (self, char * anno="", int64_t buffer_size = 100000 ):
-        """fw is the fixed-width for all locations.
-
-        """
-        self.__locations = {}    # dictionary with chrname as key, nparray with [('l','int32'),('r','int32')] as value
-        self.__size = {}      # dictionary with chrname as key, size of the above nparray as value
-        self.__buf_size = {}      # dictionary with chrname as key, size of the above nparray as value
-        self.__sorted = False
-        self.total = 0           # total fragments
-        self.annotation = anno   # need to be figured out
-        self.rlengths = {}
-        self.buffer_size = buffer_size
-        self.length = 0
-        self.average_template_length = 0.0
-
-    cpdef void add_loc ( self, bytes chromosome, int32_t start, int32_t end):
-        """Add a location to the list according to the sequence name.
-
-        chromosome -- mostly the chromosome name
-        fiveendpos -- 5' end pos, left for plus strand, right for neg strand
-        """
-        cdef:
-            int32_t i
-
-        if chromosome not in self.__locations:
-            self.__buf_size[chromosome] = self.buffer_size
-            self.__locations[chromosome] = np.zeros(shape=self.buffer_size, dtype=[('l','int32'),('r','int32')]) # note: ['l'] is the leftmost end, ['r'] is the rightmost end of fragment.
-            self.__locations[chromosome][0] = ( start, end )
-            self.__size[chromosome] = 1
-        else:
-            i = self.__size[chromosome]
-            if self.__buf_size[chromosome] == i:
-                self.__buf_size[chromosome] += self.buffer_size
-                self.__locations[chromosome].resize((self.__buf_size[chromosome]), refcheck = False )
-            self.__locations[chromosome][ i ] = ( start, end )
-            self.__size[chromosome] = i + 1
-        self.length += end - start
-        return
-
-    cpdef void destroy ( self ):
-        """Destroy this object and release mem.
-        """
-        cdef:
-            set chrs
-            bytes chromosome
-
-        chrs = self.get_chr_names()
-        for chromosome in sorted(chrs):
-            if chromosome in self.__locations:
-                self.__locations[chromosome].resize( self.buffer_size, refcheck=False )
-                self.__locations[chromosome].resize( 0, refcheck=False )
-                self.__locations[chromosome] = None
-                self.__locations.pop(chromosome)
-        self.__destroyed = True
-        return
-
-    cpdef bint set_rlengths ( self, dict rlengths ):
-        """Set reference chromosome lengths dictionary.
-
-        Only the chromosome existing in this petrack object will be updated.
-
-        If a chromosome in this petrack is not covered by given
-        rlengths, and it has no associated length, it will be set as
-        maximum integer.
-        """
-        cdef:
-            set valid_chroms, missed_chroms
-            bytes chrom
-
-        valid_chroms = set(self.__locations.keys()).intersection(rlengths.keys())
-        for chrom in sorted(valid_chroms):
-            self.rlengths[chrom] = rlengths[chrom]
-        missed_chroms = set(self.__locations.keys()).difference(rlengths.keys())
-        for chrom in sorted(missed_chroms):
-            self.rlengths[chrom] = INT_MAX
-        return True
-
-    cpdef dict get_rlengths ( self ):
-        """Get reference chromosome lengths dictionary.
-
-        If self.rlengths is empty, create a new dict where the length of
-        chromosome will be set as the maximum integer.
-        """
-        if not self.rlengths:
-            self.rlengths = dict([(k, INT_MAX) for k in self.__locations.keys()])
-        return self.rlengths
-
-    cpdef void finalize ( self ):
-        """ Resize np arrays for 5' positions and sort them in place
-
-        Note: If this function is called, it's impossible to append more files to this FWTrack object. So remember to call it after all the files are read!
-        """
-
-        cdef:
-            int32_t i
-            bytes c
-            set chrnames
-
-        self.total = 0
-
-        chrnames = self.get_chr_names()
-
-        for c in chrnames:
-            self.__locations[c].resize((self.__size[c]), refcheck=False)
-            self.__locations[c].sort( order=['l', 'r'] )
-            self.total += self.__size[c]
-
-        self.__sorted = True
-        self.average_template_length = <float32_t>( self.length ) / self.total
-        return
-
-    cpdef get_locations_by_chr ( self, bytes chromosome ):
-        """Return a tuple of two lists of locations for certain chromosome.
-
-        """
-        if chromosome in self.__locations:
-            return self.__locations[chromosome]
-        else:
-            raise Exception("No such chromosome name (%s) in TrackI object!\n" % (chromosome))
-
-    cpdef set get_chr_names ( self ):
-        """Return all the chromosome names in this track object as a python set.
-        """
-        return set(self.__locations.keys())
-
-
-    cpdef void sort ( self ):
-        """Naive sorting for locations.
-
-        """
-        cdef:
-            uint32_t i
-            bytes c
-            set chrnames
-
-        chrnames = self.get_chr_names()
-
-        for c in chrnames:
-            #print "before", self.__locations[c][0:100]
-            self.__locations[c].sort( order=['l', 'r'] ) # sort by the leftmost location
-            #print "before", self.__locations[c][0:100]
-        self.__sorted = True
-        return
-
-    cpdef dict count_fraglengths ( self ):
-        """Return a dictionary of the counts for sizes/fragment lengths of each pair.
-
-        This function is for HMMRATAC.
-        """
-        cdef:
-            np.ndarray[np.int32_t, ndim=1] sizes
-            np.int32_t s
-            np.ndarray locs
-            list chrnames
-            int i
-            #dict ret_dict
-            bytes k
-
-        counter = Counter()
-        chrnames = list( self.get_chr_names() )
-        for i in range( len(chrnames) ):
-            locs = self.__locations[ chrnames[i] ]
-            sizes = locs['r'] - locs['l']
-            for s in sizes:
-                counter[ s ] += 1
-        return dict(counter)
-
-    cpdef np.ndarray fraglengths ( self ):
-        """Return the sizes/fragment lengths of each pair.
-
-        This function is for HMMRATAC EM training.
-        """
-        cdef:
-            np.ndarray[np.int32_t, ndim=1] sizes
-            np.ndarray locs
-            list chrnames
-            int i
-
-        chrnames = list( self.get_chr_names() )
-        locs = self.__locations[ chrnames[ 0 ] ]
-        sizes = locs['r'] - locs['l']
-        for i in range( 1, len(chrnames) ):
-            locs = self.__locations[ chrnames[i] ]
-            sizes = np.concatenate( ( sizes, locs['r'] - locs['l'] ) )
-        return sizes    
-    
-    @cython.boundscheck(False) # do not check that np indices are valid
-    cpdef void filter_dup ( self, int32_t maxnum=-1):
-        """Filter the duplicated reads.
-
-        Run it right after you add all data into this object.
-        """
-        cdef:
-            int32_t i_chrom, n, start, end
-            int32_t loc_start, loc_end, current_loc_start, current_loc_end
-            uint64_t i
-            bytes k
-            np.ndarray locs
-            uint64_t locs_size
-            set chrnames
-            np.ndarray selected_idx
-
-        if maxnum < 0: return # condition to return if not filtering
-
-        if not self.__sorted: self.sort()
-
-        self.total = 0
-        #self.length = 0
-        self.average_template_length = 0.0
-        
-        chrnames = self.get_chr_names()
-
-        for k in chrnames: # for each chromosome
-            locs = self.__locations[k]
-            locs_size = locs.shape[0]
-            if locs_size == 1:
-                # do nothing and continue
-                continue
-            # discard duplicate reads and make a new __locations[k]
-            # initialize boolean array as all TRUE, or all being kept
-            selected_idx = np.ones( locs_size, dtype=bool)
-            # get the first loc
-            ( current_loc_start, current_loc_end ) = locs[0]
-            i = 1 # index of new_locs
-            n = 1 # the number of tags in the current genomic location
-            for i in range(1, locs_size):
-                ( loc_start, loc_end ) = locs[i]
-                if loc_start != current_loc_start or loc_end != current_loc_end:
-                    # not the same, update currnet_loc_start/end/l, reset n
-                    current_loc_start = loc_start
-                    current_loc_end = loc_end
-                    n = 1
-                    continue
-                else:
-                    # both ends are the same, add 1 to duplicate number n
-                    n += 1
-                    if n > maxnum:
-                        # change the flag to False
-                        selected_idx[ i ] = False
-                        # subtract current_loc_l from self.length
-                        self.length -= current_loc_end - current_loc_start
-            self.__locations[k] = locs[ selected_idx ]
-            self.__size[k] = self.__locations[k].shape[0]
-            self.total += self.__size[k]
-            # free memory?
-            # I know I should shrink it to 0 size directly,
-            # however, on Mac OSX, it seems directly assigning 0
-            # doesn't do a thing.
-            selected_idx.resize( self.buffer_size, refcheck=False)
-            selected_idx.resize( 0, refcheck=False)
-        self.average_template_length = self.length / self.total
-        return
-
-    cpdef void sample_percent (self, float32_t percent, int32_t seed = -1):
-        """Sample the tags for a given percentage.
-
-        Warning: the current object is changed! If a new PETrackI is wanted, use sample_percent_copy instead.
-        """
-        cdef:
-            uint32_t num, i_chrom      # num: number of reads allowed on a certain chromosome
-            bytes k
-            set chrnames
-            object rs, rs_shuffle
-
-        self.total = 0
-        self.length = 0
-        self.average_template_length = 0.0
-
-        chrnames = self.get_chr_names()
-
-        if seed >= 0:
-            info(f"#   A random seed {seed} has been used")
-            rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(seed)))
-            rs_shuffle = rs.shuffle
-        else:
-            rs_shuffle = np.random.shuffle
-
-        for k in sorted(chrnames):
-            # for each chromosome.
-            # This loop body is too big, I may need to split code later...
-
-            num = <uint32_t>round(self.__locations[k].shape[0] * percent, 5 )
-            rs_shuffle( self.__locations[k] )
-            self.__locations[k].resize( num, refcheck = False )
-            self.__locations[k].sort( order = ['l', 'r'] ) # sort by leftmost positions
-            self.__size[k] = self.__locations[k].shape[0]
-            self.length += ( self.__locations[k]['r'] - self.__locations[k]['l'] ).sum()
-            self.total += self.__size[k]
-        self.average_template_length = <float32_t>( self.length )/ self.total
-        return
-
-    cpdef object sample_percent_copy (self, float32_t percent, int32_t seed = -1):
-        """Sample the tags for a given percentage. Return a new PETrackI object
-
-        """
-        cdef:
-            uint32_t num, i_chrom      # num: number of reads allowed on a certain chromosome
-            bytes k
-            set chrnames
-            object ret_petrackI, rs, rs_shuffle
-            np.ndarray l
-
-        ret_petrackI = PETrackI( anno=self.annotation, buffer_size = self.buffer_size)
-        chrnames = self.get_chr_names()
-
-        if seed >= 0:
-            info(f"# A random seed {seed} has been used in the sampling function")
-            rs = np.random.default_rng(seed)
-        else:
-            rs = np.random.default_rng()
-
-        rs_shuffle = rs.shuffle
-        for k in sorted(chrnames): # chrnames need to be sorted otherwise we can't assure reproducibility
-            # for each chromosome.
-            # This loop body is too big, I may need to split code later...
-            l = np.copy( self.__locations[k] )
-            num = <uint32_t>round(l.shape[0] * percent, 5 )
-            rs_shuffle( l )
-            l.resize( num, refcheck = False )
-            l.sort( order = ['l', 'r'] ) # sort by leftmost positions
-            ret_petrackI.__locations[ k ] = l
-            ret_petrackI.__size[ k ] = l.shape[0]
-            ret_petrackI.length += ( l['r'] - l['l'] ).sum()
-            ret_petrackI.total += ret_petrackI.__size[ k ]
-        ret_petrackI.average_template_length = <float32_t>( ret_petrackI.length )/ ret_petrackI.total
-        ret_petrackI.set_rlengths( self.get_rlengths() )
-        return ret_petrackI
-
-    cpdef void sample_num (self, uint64_t samplesize, int32_t seed = -1):
-        """Sample the tags for a given number.
-
-        Warning: the current object is changed!
-        """
-        cdef:
-            float32_t percent
-        percent = <float32_t>(samplesize)/self.total
-        self.sample_percent ( percent, seed )
-        return
-
-    cpdef object sample_num_copy (self, uint64_t samplesize, int32_t seed = -1):
-        """Sample the tags for a given number.
-
-        Warning: the current object is changed!
-        """
-        cdef:
-            float32_t percent
-        percent = <float32_t>(samplesize)/self.total
-        return self.sample_percent_copy ( percent, seed )
-
-    cpdef void print_to_bed (self, fhd=None):
-        """Output to BEDPE format files. If fhd is given, write to a
-        file, otherwise, output to standard output.
-
-        """
-        cdef:
-            int32_t i, i_chrom, s, e
-            bytes k
-            set chrnames
-
-
-        if not fhd:
-            fhd = sys.stdout
-        assert isinstance(fhd, io.IOBase)
-
-        chrnames = self.get_chr_names()
-
-        for k in chrnames:
-            # for each chromosome.
-            # This loop body is too big, I may need to split code later...
-
-            locs = self.__locations[k]
-
-            for i in range(locs.shape[0]):
-                s, e = locs[ i ]
-                fhd.write("%s\t%d\t%d\n" % (k.decode(), s, e))
-        return
-
-    cpdef list pileup_a_chromosome ( self, bytes chrom, list scale_factor_s, float32_t baseline_value = 0.0 ):
-        """pileup a certain chromosome, return [p,v] (end position and value) list.
-
-        scale_factor_s  : linearly scale the pileup value applied to each d in ds. The list should have the same length as ds.
-        baseline_value : a value to be filled for missing values, and will be the minimum pileup.
-        """
-        cdef:
-            list tmp_pileup, prev_pileup
-            float32_t scale_factor
-
-        prev_pileup = None
-
-        for i in range(len(scale_factor_s)):
-            scale_factor = scale_factor_s[i]
-
-            tmp_pileup = quick_pileup ( np.sort(self.__locations[chrom]['l']), np.sort(self.__locations[chrom]['r']), scale_factor, baseline_value ) # Can't directly pass partial nparray there since that will mess up with pointer calculation.
-
-            if prev_pileup:
-                prev_pileup = over_two_pv_array ( prev_pileup, tmp_pileup, func="max" )
-            else:
-                prev_pileup = tmp_pileup
-
-        return prev_pileup
-
-    cpdef list pileup_a_chromosome_c ( self, bytes chrom, list ds, list scale_factor_s, float32_t baseline_value = 0.0 ):
-        """pileup a certain chromosome, return [p,v] (end position and value) list.
-
-        This function is for control track. Basically, here is a
-        simplified function from FixWidthTrack. We pretend the PE is
-        SE data and left read is on plus strand and right read is on
-        minus strand.
-
-        ds             : tag will be extended to this value to 3' direction,
-                         unless directional is False. Can contain multiple extension
-                         values. Final pileup will the maximum.
-        scale_factor_s  : linearly scale the pileup value applied to each d in ds. The list should have the same length as ds.
-        baseline_value : a value to be filled for missing values, and will be the minimum pileup.
-        """
-        cdef:
-            list tmp_pileup, prev_pileup
-            float32_t scale_factor
-            int64_t d, five_shift, three_shift
-            int64_t rlength = self.get_rlengths()[chrom]
-
-        if not self.__sorted: self.sort()
-
-        assert len(ds) == len(scale_factor_s), "ds and scale_factor_s must have the same length!"
-
-        prev_pileup = None
-
-        for i in range(len(scale_factor_s)):
-            d = ds[i]
-            scale_factor = scale_factor_s[i]
-            five_shift = d//2
-            three_shift= d//2
-
-            tmp_pileup = se_all_in_one_pileup ( self.__locations[chrom]['l'], self.__locations[chrom]['r'], five_shift, three_shift, rlength, scale_factor, baseline_value )
-
-            if prev_pileup:
-                prev_pileup = over_two_pv_array ( prev_pileup, tmp_pileup, func="max" )
-            else:
-                prev_pileup = tmp_pileup
-
-        return prev_pileup
-
-
-    cpdef object pileup_bdg ( self, list scale_factor_s, float32_t baseline_value = 0.0 ):
-        """pileup all chromosomes, and return a bedGraphTrackI object.
-
-        scale_factor_s  : linearly scale the pileup value applied to each d in ds. The list should have the same length as ds.
-        baseline_value : a value to be filled for missing values, and will be the minimum pileup.
-        """
-        cdef:
-            list tmp_pileup, prev_pileup
-            float32_t scale_factor
-            bytes chrom
-            object bdg
-            int32_t prev_s
-
-        #info(f"start to pileup")
-        bdg = bedGraphTrackI( baseline_value = baseline_value )
-
-        for chrom in sorted(self.get_chr_names()):
-            prev_pileup = None
-            for i in range(len(scale_factor_s)):
-                scale_factor = scale_factor_s[i]
-
-                tmp_pileup = quick_pileup ( np.sort(self.__locations[chrom]['l']), np.sort(self.__locations[chrom]['r']), scale_factor, baseline_value ) # Can't directly pass partial nparray there since that will mess up with pointer calculation.
-
-                if prev_pileup:
-                    prev_pileup = over_two_pv_array ( prev_pileup, tmp_pileup, func="max" )
-                else:
-                    prev_pileup = tmp_pileup
-            # save to bedGraph
-            bdg.add_chrom_data( chrom, pyarray('i', prev_pileup[0]), pyarray('f', prev_pileup[1]) )
-        return bdg
-
-    cpdef list pileup_bdg_hmmr ( self, list mapping, float32_t baseline_value = 0.0 ):
-        """pileup all chromosomes, and return a list of four bedGraphTrackI objects: short, mono, di, and tri nucleosomal signals.
-
-        The idea is that for each fragment length, we generate four bdg using four weights from four distributions. Then we add all sets of four bdgs together.
-
-        Way to generate 'mapping', based on HMMR EM means and stddevs:
-        fl_dict = petrack.count_fraglengths()
-        fl_list = list(fl_dict.keys())
-        fl_list.sort()
-        weight_mapping = generate_weight_mapping( fl_list, em_means, em_stddevs )
-        """
-        cdef:
-            list ret_pileup
-            set chroms
-            bytes chrom
-            int i
-
-        ret_pileup = []
-        for i in range( len(mapping) ): ret_pileup.append( {} )
-        chroms = self.get_chr_names()
-        for i in range( len(mapping) ):
-            for chrom in sorted(chroms):
-                ret_pileup[ i ][ chrom ] = pileup_from_LR_hmmratac( self.__locations[ chrom ], mapping[ i ] )
-        return ret_pileup
-
diff --git a/MACS3/Signal/ScoreTrack.pyx b/MACS3/Signal/ScoreTrack.pyx
index 0426b18a..1ef3d31b 100644
--- a/MACS3/Signal/ScoreTrack.pyx
+++ b/MACS3/Signal/ScoreTrack.pyx
@@ -1,6 +1,6 @@
 # cython: language_level=3
 # cython: profile=True
-# Time-stamp: <2024-05-14 12:06:19 Tao Liu>
+# Time-stamp: <2024-10-10 16:45:13 Tao Liu>
 
 """Module for Feature IO classes.
 
@@ -20,7 +20,7 @@ from functools import reduce
 # ------------------------------------
 from MACS3.Signal.SignalProcessing import maxima, enforce_valleys, enforce_peakyness
 from MACS3.Signal.Prob import poisson_cdf
-from MACS3.IO.PeakIO import PeakIO, BroadPeakIO, parse_peakname
+from MACS3.IO.PeakIO import PeakIO, BroadPeakIO
 
 # ------------------------------------
 # Other modules
diff --git a/setup.py b/setup.py
index a36e558b..65d78062 100644
--- a/setup.py
+++ b/setup.py
@@ -120,7 +120,7 @@ def main():
                              include_dirs=numpy_include_dir,
                              extra_compile_args=extra_c_args),
                    Extension("MACS3.Signal.PairedEndTrack",
-                             ["MACS3/Signal/PairedEndTrack.pyx"],
+                             ["MACS3/Signal/PairedEndTrack.py"],
                              include_dirs=numpy_include_dir,
                              extra_compile_args=extra_c_args),
                    Extension("MACS3.Signal.BedGraph",
@@ -188,7 +188,7 @@ def main():
                              ["MACS3/IO/Parser.py"],
                              extra_compile_args=extra_c_args),
                    Extension("MACS3.IO.PeakIO",
-                             ["MACS3/IO/PeakIO.pyx"],
+                             ["MACS3/IO/PeakIO.py"],
                              extra_compile_args=extra_c_args),
                    Extension("MACS3.IO.BedGraphIO",
                              ["MACS3/IO/BedGraphIO.py"],