From 978febcf9cc13eaebdbc9e9366855dff282c19a0 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Sun, 6 Dec 2020 23:25:39 -0500
Subject: [PATCH 1/6] use buffer to read input file every 10M bytes

---
 MACS3/IO/Parser.pyx          | 205 +++++++++++++++++++++--------------
 MACS3/Utilities/Constants.py |   3 +-
 2 files changed, 128 insertions(+), 80 deletions(-)

diff --git a/MACS3/IO/Parser.pyx b/MACS3/IO/Parser.pyx
index 5dfc4f59..5e844659 100644
--- a/MACS3/IO/Parser.pyx
+++ b/MACS3/IO/Parser.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2020-12-03 11:48:43 Tao Liu>
+# Time-stamp: <2020-12-06 23:24:53 Tao Liu>
 
 """Module for all MACS Parser classes for input.
 
@@ -87,7 +87,7 @@ cpdef guess_parser ( fname, int64_t buffer_size = 100000 ):
             t_parser.close()
     raise Exception( "Can't detect format!" )
 
-cdef tuple __fw_binary_parse_le ( const unsigned char * data ):
+cdef tuple __bam_fw_binary_parse_le ( const unsigned char * data ):
     """Parse a BAM SE entry in little endian system
     """
     cdef:
@@ -135,7 +135,7 @@ cdef tuple __fw_binary_parse_le ( const unsigned char * data ):
 
     return ( thisref, thisstart, thisstrand )
 
-cdef tuple __fw_binary_parse_be ( const unsigned char * data ):
+cdef tuple __bam_fw_binary_parse_be ( const unsigned char * data ):
     """Big endian version. We need byte swap.
     """
     cdef:
@@ -191,7 +191,7 @@ cdef tuple __fw_binary_parse_be ( const unsigned char * data ):
 
     return ( thisref, thisstart, thisstrand )
 
-cdef tuple __pe_binary_parse_le (const unsigned char * data):
+cdef tuple __bampe_pe_binary_parse_le (const unsigned char * data):
     """Parse a BAMPE record in little-endian system.
     """
     cdef:
@@ -234,7 +234,7 @@ cdef tuple __pe_binary_parse_le (const unsigned char * data):
 
     return ( thisref, thisstart, thistlen )
 
-cdef tuple __pe_binary_parse_be (const unsigned char * data):
+cdef tuple __bampe_pe_binary_parse_be (const unsigned char * data):
     """Parse a BAMPE record in big-endian system. And we need byte swap.
     """
     cdef:
@@ -281,11 +281,11 @@ cdef tuple __pe_binary_parse_be (const unsigned char * data):
 
 # choose a parser according to endian
 if is_le:
-    se_entry_parser = __fw_binary_parse_le
-    pe_entry_parser = __pe_binary_parse_le
+    bam_se_entry_parser = __bam_fw_binary_parse_le
+    bampe_pe_entry_parser = __bampe_pe_binary_parse_le
 else:
-    se_entry_parser = __fw_binary_parse_be
-    pe_entry_parser = __pe_binary_parse_be
+    bam_se_entry_parser = __bam_fw_binary_parse_be
+    bampe_pe_entry_parser = __bampe_pe_binary_parse_be
 
 # ------------------------------------
 # Classes
@@ -343,7 +343,7 @@ cdef class GenericParser:
         f.close()
         if self.gzipped:
             # open with gzip.open, then wrap it with BufferedReader!
-            self.fhd = io.BufferedReader( gzip.open( filename, mode='rb' ), buffer_size = 1048576 ) # buffersize set to 1M
+            self.fhd = io.BufferedReader( gzip.open( filename, mode='rb' ), buffer_size = READ_BUFFER_SIZE ) # buffersize set to 10M
         else:
             self.fhd = io.open( filename, mode='rb' ) # binary mode! I don't expect unicode here!
         self.__skip_first_commentlines()
@@ -403,26 +403,36 @@ cdef class GenericParser:
         * BAMParser for binary BAM format should have a different one.
         """
         cdef:
-            int64_t i, m, fpos, strand
+            int64_t i, fpos, strand
             bytes chromosome
+            bytes tmp
 
         fwtrack = FWTrack( buffer_size = self.buffer_size )
         i = 0
-        m = 0
-        for thisline in self.fhd:
-            ( chromosome, fpos, strand ) = self.__fw_parse_line( thisline )
-            i+=1
-            if fpos < 0 or not chromosome:
-                # normally __fw_parse_line will return -1 if the line
-                # contains no successful alignment.
-                continue
-            if i % 1000000 == 0:
-                info( " %d" % i )
-            fwtrack.add_loc( chromosome, fpos, strand )
-
-        # close fwtrack and sort
-        # fwtrack.finalize()
-        # this is the problematic part. If fwtrack is finalized, then it's impossible to increase the length of it in a step of buffer_size for multiple input files.
+        tmp = b""
+        while True:
+            # for each block of input
+            tmp += self.fhd.read( READ_BUFFER_SIZE )
+            if not tmp:
+                break
+            lines = tmp.split(b"\n")
+            tmp = lines[ -1 ]
+            for thisline in lines[ :-1 ]:
+                ( chromosome, fpos, strand ) = self.__fw_parse_line( thisline )
+                if fpos < 0 or not chromosome:
+                    # normally __fw_parse_line will return -1 if the line
+                    # contains no successful alignment.
+                    continue
+                i += 1
+                if i % 1000000 == 0:
+                    info( " %d reads parsed" % i )
+                fwtrack.add_loc( chromosome, fpos, strand )
+        # last one
+        if tmp:
+            ( chromosome, fpos, strand ) = self.__fw_parse_line( tmp )
+            if fpos >= 0 and chromosome:
+                i += 1
+                fwtrack.add_loc( chromosome, fpos, strand )
         # close file stream.
         self.close()
         return fwtrack
@@ -431,22 +441,37 @@ cdef class GenericParser:
         """Add more records to an existing FWTrack object.
 
         """
+        cdef:
+            int64_t i, fpos, strand
+            bytes chromosome
+            bytes tmp
         i = 0
-        m = 0
-        for thisline in self.fhd:
-            ( chromosome, fpos, strand ) = self.__fw_parse_line( thisline )
-            i+=1
-            if fpos < 0 or not chromosome:
-                # normally __fw_parse_line will return -1 if the line
-                # contains no successful alignment.
-                continue
-            if i % 1000000 == 0:
-                info( " %d" % i )
-            fwtrack.add_loc( chromosome, fpos, strand )
-
-        # close fwtrack and sort
-        #fwtrack.finalize()
-        # this is the problematic part. If fwtrack is finalized, then it's impossible to increase the length of it in a step of buffer_size for multiple input files.
+        tmp = "b"
+        while True:
+            # for each block of input
+            tmp += self.fhd.read( READ_BUFFER_SIZE )
+            if not tmp:
+                break
+            lines = tmp.split(b"\n")
+            tmp = lines[ -1 ]
+            for thisline in lines[ :-1 ]:
+                ( chromosome, fpos, strand ) = self.__fw_parse_line( thisline )
+                if fpos < 0 or not chromosome:
+                    # normally __fw_parse_line will return -1 if the line
+                    # contains no successful alignment.
+                    continue
+                i += 1
+                if i % 1000000 == 0:
+                    info( " %d reads parsed" % i )
+                fwtrack.add_loc( chromosome, fpos, strand )
+
+        # last one
+        if tmp:
+            ( chromosome, fpos, strand ) = self.__fw_parse_line( tmp )
+            if fpos >= 0 and chromosome:
+                i += 1
+                fwtrack.add_loc( chromosome, fpos, strand )
+        # close file stream.
         self.close()
         return fwtrack
 
@@ -612,27 +637,39 @@ cdef class BEDPEParser(GenericParser):
             int32_t right_pos
             int64_t i = 0          # number of fragments
             int64_t m = 0          # sum of fragment lengths
+            bytes tmp = b""
 
         petrack = PETrackI( buffer_size = self.buffer_size )
         add_loc = petrack.add_loc
 
-        for thisline in self.fhd:
+        while True:
+            # for each block of input
+            tmp += self.fhd.read( READ_BUFFER_SIZE )
+            if not tmp:
+                break
+            lines = tmp.split(b"\n")
+            tmp = lines[ -1 ]
+            for thisline in lines[ :-1 ]:
+                ( chromosome, left_pos, right_pos ) = self.__pe_parse_line( thisline )
+                if left_pos < 0 or not chromosome:
+                    continue
+                assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
+                m += right_pos - left_pos
+                i += 1
+                if i % 1000000 == 0:
+                    info( " %d fragments parsed" % i )
+                add_loc( chromosome, left_pos, right_pos )
+        # last one
+        if tmp:
             ( chromosome, left_pos, right_pos ) = self.__pe_parse_line( thisline )
-            if left_pos < 0 or not chromosome:
-                continue
-
-            assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
-            m += right_pos - left_pos
-            i += 1
-
-            if i % 1000000 == 0:
-                info( " %d" % i )
-
-            add_loc( chromosome, left_pos, right_pos )
-
+            if left_pos >= 0 and chromosome:
+                assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
+                i += 1
+                m += right_pos - left_pos
+                add_loc( chromosome, left_pos, right_pos )
+                
         self.d = <float32_t>( m ) / i
         self.n = i
-
         assert self.d >= 0, "Something went wrong (mean fragment size was negative)"
 
         self.close()
@@ -648,29 +685,39 @@ cdef class BEDPEParser(GenericParser):
             int32_t right_pos
             int64_t i = 0          # number of fragments
             int64_t m = 0          # sum of fragment lengths
+            bytes tmp = b""
 
         add_loc = petrack.add_loc
-
-        for thisline in self.fhd:
+        while True:
+            # for each block of input
+            tmp += self.fhd.read( READ_BUFFER_SIZE )
+            if not tmp:
+                break
+            lines = tmp.split(b"\n")
+            tmp = lines[ -1 ]
+            for thisline in lines[ :-1 ]:
+                ( chromosome, left_pos, right_pos ) = self.__pe_parse_line( thisline )
+                if left_pos < 0 or not chromosome:
+                    continue
+                assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
+                m += right_pos - left_pos
+                i += 1
+                if i % 1000000 == 0:
+                    info( " %d fragments parsed" % i )
+                add_loc( chromosome, left_pos, right_pos )
+        # last one
+        if tmp:
             ( chromosome, left_pos, right_pos ) = self.__pe_parse_line( thisline )
-
-            if left_pos < 0 or not chromosome:
-                continue
-
-            assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
-            m += right_pos - left_pos
-            i += 1
-
-            if i % 1000000 == 0:
-                info( " %d" % i )
-
-            add_loc( chromosome, left_pos, right_pos )
+            if left_pos >= 0 and chromosome:
+                assert right_pos > left_pos, "Right position must be larger than left position, check your BED file at line: %s" % thisline
+                i += 1
+                m += right_pos - left_pos
+                add_loc( chromosome, left_pos, right_pos )
 
         self.d = ( self.d * self.n + m ) / ( self.n + i )
         self.n += i
 
         assert self.d >= 0, "Something went wrong (mean fragment size was negative)"
-
         self.close()
         petrack.set_rlengths( {"DUMMYCHROM":0} )
         return petrack
@@ -1055,7 +1102,7 @@ cdef class BAMParser( GenericParser ):
         f.close()
         if self.gzipped:
             # open with gzip.open, then wrap it with BufferedReader!
-            self.fhd = io.BufferedReader( gzip.open( filename, mode='rb' ), buffer_size = 1048576) # buffersize set to 1M
+            self.fhd = io.BufferedReader( gzip.open( filename, mode='rb' ), buffer_size = READ_BUFFER_SIZE) # buffersize set to 1M
         else:
             self.fhd = io.open( filename, mode='rb' ) # binary mode! I don't expect unicode here!
 
@@ -1174,12 +1221,12 @@ cdef class BAMParser( GenericParser ):
                 entrylength = unpack( "<i", fread( 4 ) )[0]
             except struct.error:
                 break
-            ( chrid, fpos, strand ) = se_entry_parser( fread( entrylength ) )
+            ( chrid, fpos, strand ) = bam_se_entry_parser( fread( entrylength ) )
             if chrid == -1: continue
             fwtrack.add_loc( references[ chrid ], fpos, strand )
             i += 1
             if i % 1000000 == 0:
-                info( " %d" % i )
+                info( " %d reads parsed" % i )
 
         #print( f"{references[chrid]:},{fpos:},{strand:}" )
         info( "%d reads have been read." % i )
@@ -1208,12 +1255,12 @@ cdef class BAMParser( GenericParser ):
                 entrylength = unpack( '<i', fread( 4 ) )[ 0 ]
             except struct.error:
                 break
-            ( chrid, fpos, strand ) = se_entry_parser( fread( entrylength ) )
+            ( chrid, fpos, strand ) = bam_se_entry_parser( fread( entrylength ) )
             if chrid == -1: continue
             fwtrack.add_loc( references[ chrid ], fpos, strand )
             i += 1
             if i % 1000000 == 0:
-                info( " %d" % i )
+                info( " %d reads parsed" % i )
 
         info( "%d reads have been read." % i )
         self.fhd.close()
@@ -1279,7 +1326,7 @@ cdef class BAMPEParser(BAMParser):
             except err:
                 #e1 += 1
                 break
-            ( chrid, fpos, tlen ) = pe_entry_parser( fread(entrylength) )
+            ( chrid, fpos, tlen ) = bampe_pe_entry_parser( fread(entrylength) )
             if chrid == -1:
                 #e2 += 1
                 continue
@@ -1287,7 +1334,7 @@ cdef class BAMPEParser(BAMParser):
             m += tlen
             i += 1
             if i % 1000000 == 0:
-                info( " %d" % i )
+                info( " %d fragments parsed" % i )
 
         #print( f"{references[chrid]:},{fpos:},{tlen:}" )
         info( "%d fragments have been read." % i )
@@ -1325,14 +1372,14 @@ cdef class BAMPEParser(BAMParser):
                 entrylength = unpack('<i', fread(4))[0]
             except err:
                 break
-            ( chrid, fpos, tlen ) = pe_entry_parser( fread(entrylength) )
+            ( chrid, fpos, tlen ) = bampe_pe_entry_parser( fread(entrylength) )
             if chrid == -1:
                 continue
             add_loc(references[ chrid ], fpos, fpos + tlen)
             m += tlen
             i += 1
             if i % 1000000 == 0:
-                info(" %d" % i)
+                info(" %d fragments parsed" % i)
 
         info( "%d fragments have been read." % i )
         self.d = ( self.d * self.n + m ) / ( self.n + i )
diff --git a/MACS3/Utilities/Constants.py b/MACS3/Utilities/Constants.py
index 6a269359..b948c6c9 100644
--- a/MACS3/Utilities/Constants.py
+++ b/MACS3/Utilities/Constants.py
@@ -3,4 +3,5 @@
 MAX_LAMBDA  = 100000
 FESTEP      = 20
 BUFFER_SIZE = 100000                   # np array will increase at step of 1 million items
-
+READ_BUFFER_SIZE = 10000000            # 10M bytes for read buffer size
+N_MP = 2                               # Number of processers

From facaa52afd153a53d3190976ebb379e185fef034 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Sun, 6 Dec 2020 23:34:32 -0500
Subject: [PATCH 2/6] fix a typo

---
 MACS3/IO/Parser.pyx | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/MACS3/IO/Parser.pyx b/MACS3/IO/Parser.pyx
index 5e844659..62c3f846 100644
--- a/MACS3/IO/Parser.pyx
+++ b/MACS3/IO/Parser.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2020-12-06 23:24:53 Tao Liu>
+# Time-stamp: <2020-12-06 23:32:28 Tao Liu>
 
 """Module for all MACS Parser classes for input.
 
@@ -405,11 +405,10 @@ cdef class GenericParser:
         cdef:
             int64_t i, fpos, strand
             bytes chromosome
-            bytes tmp
+            bytes tmp = b""
 
         fwtrack = FWTrack( buffer_size = self.buffer_size )
         i = 0
-        tmp = b""
         while True:
             # for each block of input
             tmp += self.fhd.read( READ_BUFFER_SIZE )
@@ -444,9 +443,8 @@ cdef class GenericParser:
         cdef:
             int64_t i, fpos, strand
             bytes chromosome
-            bytes tmp
+            bytes tmp = b""
         i = 0
-        tmp = "b"
         while True:
             # for each block of input
             tmp += self.fhd.read( READ_BUFFER_SIZE )

From c85c3fd614acff3599e6ae3b82cb4c37d5faadf7 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Sun, 6 Dec 2020 23:55:41 -0500
Subject: [PATCH 3/6] rename pvalue_stat to pscore_stat

---
 MACS3/Signal/CallPeakUnit.pyx | 42 +++++++++++++++++------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/MACS3/Signal/CallPeakUnit.pyx b/MACS3/Signal/CallPeakUnit.pyx
index 8a976a56..4c9ebcde 100644
--- a/MACS3/Signal/CallPeakUnit.pyx
+++ b/MACS3/Signal/CallPeakUnit.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # cython: profile=True
 # cython: linetrace=True
-# Time-stamp: <2020-12-03 16:07:01 Tao Liu>
+# Time-stamp: <2020-12-06 23:54:17 Tao Liu>
 
 """Module for Calculate Scores.
 
@@ -617,7 +617,7 @@ cdef class CallerFromAlignments:
         cdef:
             bytes chrom
             np.ndarray pos_array, treat_array, ctrl_array, score_array
-            dict pvalue_stat
+            dict pscore_stat
             int64_t n, pre_p, length, pre_l, l, i, j
             float32_t this_v, pre_v, v, q, pre_q
             int64_t N, k, this_l
@@ -629,7 +629,7 @@ cdef class CallerFromAlignments:
 
         logging.debug ( "Start to calculate pvalue stat..." )
 
-        pvalue_stat = {} #dict()
+        pscore_stat = {} #dict()
         for i in range( len( self.chromosomes ) ):
             chrom = self.chromosomes[ i ]
             pre_p = 0
@@ -644,16 +644,16 @@ cdef class CallerFromAlignments:
             for j in range(pos_array.shape[0]):
                 this_v = get_pscore( (<int32_t>(treat_value_ptr[0]), ctrl_value_ptr[0] ) )
                 this_l = pos_ptr[0] - pre_p
-                if this_v in pvalue_stat:
-                    pvalue_stat[ this_v ] += this_l
+                if this_v in pscore_stat:
+                    pscore_stat[ this_v ] += this_l
                 else:
-                    pvalue_stat[ this_v ] = this_l
+                    pscore_stat[ this_v ] = this_l
                 pre_p = pos_ptr[0]
                 pos_ptr += 1
                 treat_value_ptr += 1
                 ctrl_value_ptr += 1
 
-        N = sum(pvalue_stat.values()) # total length
+        N = sum(pscore_stat.values()) # total length
         k = 1                         # rank
         f = -log10(N)
         pre_v = -2147483647
@@ -661,10 +661,10 @@ cdef class CallerFromAlignments:
         pre_q = 2147483647      # save the previous q-value
 
         self.pqtable = Float32to32Map( for_int = False )
-        unique_values = sorted(list(pvalue_stat.keys()), reverse=True) 
+        unique_values = sorted(list(pscore_stat.keys()), reverse=True) 
         for i in range(len(unique_values)):
             v = unique_values[i]
-            l = pvalue_stat[v]
+            l = pscore_stat[v]
             q = v + (log10(k) + f)
             if q > pre_q:
                 q = pre_q
@@ -689,7 +689,7 @@ cdef class CallerFromAlignments:
         cdef:
             bytes chrom
             np.ndarray pos_array, treat_array, ctrl_array, score_array
-            dict pvalue_stat
+            dict pscore_stat
             int64_t n, pre_p, this_p, length, j, pre_l, l, i
             float32_t q, pre_q, this_t, this_c
             float32_t this_v, pre_v, v, cutoff
@@ -714,8 +714,8 @@ cdef class CallerFromAlignments:
         # tmplist contains a list of log pvalue cutoffs from 0.3 to 10
         tmplist = [round(x,5) for x in sorted( list(np.arange(0.3, 10.0, 0.3)), reverse = True )]
 
-        pvalue_stat = {} #dict()
-        #print (list(pvalue_stat.keys()))
+        pscore_stat = {} #dict()
+        #print (list(pscore_stat.keys()))
         #print (list(self.pvalue_length.keys()))
         #print (list(self.pvalue_npeaks.keys()))
         for i in range( len( self.chromosomes ) ):
@@ -777,25 +777,25 @@ cdef class CallerFromAlignments:
                 this_p = pos_array_ptr[ 0 ]
                 this_l = this_p - pre_p
                 this_v = score_array_ptr[ 0 ]
-                if this_v in pvalue_stat:
-                    pvalue_stat[ this_v ] += this_l
+                if this_v in pscore_stat:
+                    pscore_stat[ this_v ] += this_l
                 else:
-                    pvalue_stat[ this_v ] = this_l
+                    pscore_stat[ this_v ] = this_l
                 pre_p = this_p #pos_array[ i ]
                 pos_array_ptr += 1
                 score_array_ptr += 1
 
-        #logging.debug ( "make pvalue_stat cost %.5f seconds" % t )
+        #logging.debug ( "make pscore_stat cost %.5f seconds" % t )
 
         # add all pvalue cutoffs from cutoff-analysis part. So that we
         # can get the corresponding qvalues for them.
         for cutoff in tmplist:
-            if cutoff not in pvalue_stat:
-                pvalue_stat[ cutoff ] = 0
+            if cutoff not in pscore_stat:
+                pscore_stat[ cutoff ] = 0
 
         nhval = 0
 
-        N = sum(pvalue_stat.values()) # total length
+        N = sum(pscore_stat.values()) # total length
         k = 1                           # rank
         f = -log10(N)
         pre_v = -2147483647
@@ -803,10 +803,10 @@ cdef class CallerFromAlignments:
         pre_q = 2147483647              # save the previous q-value
 
         self.pqtable = Float32to32Map( for_int = False ) #{}
-        unique_values = sorted(list(pvalue_stat.keys()), reverse=True) #sorted(unique_values,reverse=True)
+        unique_values = sorted(list(pscore_stat.keys()), reverse=True) #sorted(unique_values,reverse=True)
         for i in range(len(unique_values)):
             v = unique_values[i]
-            l = pvalue_stat[v]
+            l = pscore_stat[v]
             q = v + (log10(k) + f)
             if q > pre_q:
                 q = pre_q

From cdc9b7ec04f75b9e3e801097aadd822dcee97e87 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Sun, 6 Dec 2020 23:55:51 -0500
Subject: [PATCH 4/6] update README

---
 README.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 84f721a2..0c008b60 100644
--- a/README.md
+++ b/README.md
@@ -39,16 +39,18 @@ add more new features in the future.**
 ### 3.0.0a2
 	* Features
 	
-	1) Speed/memory optimization, including using the cykhash to
-    replace python dictionary
+	1) Speed/memory optimization.  Use the cykhash to replace python
+    dictionary. Use buffer (10MB) to read and parse input file (not
+    available for BAM file parser). And many optimization tweaks.
 
-	2) Code cleanup
+	2) Code cleanup. Reorganize source codes.
 
-	3) Unit testing
+	3) Unit testing.
 
 	4) R wrappers for MACS -- MACSr
 
-    5) Switching to Github Action for CI, support multi-arch testing
+    5) Switch to Github Action for CI, support multi-arch testing
+    including x64, armv7, aarch64, s390x and ppc64le.
 
     6) MACS tag-shifting model has been refined. Now it will use a
     naive peak calling approach to find ALL possible paired peaks at +

From 5f21c5055c8209888ae37d2dc2a6885565452394 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Mon, 7 Dec 2020 00:00:55 -0500
Subject: [PATCH 5/6] update documents

---
 ChangeLog | 27 +++++++++++++++++++++++++++
 README.md |  8 +++++---
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0f87e62e..d9c8659b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+2020-12-06  Tao Liu  <vladimir.liu@gmail.com>
+	MACS 3.0.0a2
+
+	* New features:
+
+	1) Speed/memory optimization.  Use the cykhash to replace python
+	dictionary. Use buffer (10MB) to read and parse input file (not
+	available for BAM file parser). And many optimization tweaks.
+
+	2) Code cleanup. Reorganize source codes.
+
+	3) Unit testing.
+
+	4) R wrappers for MACS -- MACSr
+
+	5) Switch to Github Action for CI, support multi-arch testing
+	including x64, armv7, aarch64, s390x and ppc64le.
+
+	6) MACS tag-shifting model has been refined. Now it will use a
+	naive peak calling approach to find ALL possible paired peaks at +
+	and - strand, then use all of them to calculate the
+	cross-correlation.
+
+	7) Call variants in peak regions directly from BAM files. The
+	function was originally developed under code name SAPPER. Now
+	SAPPER has been merged into MACS.
+
 2020-04-11  Tao Liu  <vladimir.liu@gmail.com>
 	MACS version 2.2.7.1
 
diff --git a/README.md b/README.md
index 0c008b60..1a27a8ac 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ add more new features in the future.**
 ## Recent Changes for MACS (3.0.0a2)
 
 ### 3.0.0a2
-	* Features
+	* New features
 	
 	1) Speed/memory optimization.  Use the cykhash to replace python
     dictionary. Use buffer (10MB) to read and parse input file (not
@@ -58,8 +58,10 @@ add more new features in the future.**
     cross-correlation.
 
     7) Call variants in peak regions directly from BAM files. The
-    function was originally developed under code name SAPPER. Now
-    SAPPER has been merged into MACS. 
+	function was originally developed under code name SAPPER. Now
+	SAPPER has been merged into MACS. Also, `simde` has been added as
+	a submodule in order to support fermi-lite library under non-x64
+	architectures.
 
 ## Install
 

From 575c9a6952c87a0b81b3b21ed3bec29864037cc9 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Mon, 7 Dec 2020 00:11:20 -0500
Subject: [PATCH 6/6] update manifest.in and license

---
 LICENSE     | 4 +++-
 MANIFEST.in | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/LICENSE b/LICENSE
index 66d7a751..02123625 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,8 @@
 BSD 3-Clause License
 
-Copyright (c) 2019, Tao Liu lab at Roswell Park Comprehensive Cancer Center and Xiaole Shirley Liu lab at Dana-Farber Cancer Institute, All rights reserved.
+Copyright (c) 2020, Tao Liu lab at Roswell Park Comprehensive Cancer
+Center and Xiaole Shirley Liu lab at Dana-Farber Cancer Institute, All
+rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
diff --git a/MANIFEST.in b/MANIFEST.in
index ca60b9d2..b5b15b64 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,5 @@
 include README.md LICENSE ChangeLog MANIFEST.in setup.py bin/macs3
 recursive-include MACS3 *.py *.pyx *.pxd *.c *.h
 recursive-include docs *.md
-exclude .gitignore .travis.yml
 prune test
-prune .github
-prune DOCKER
+prune MACS3/fermi-lite/lib/test/