Skip to content

Commit

Permalink
move IO functions for bedGraph to BedGraphIO class
Browse files Browse the repository at this point in the history
  • Loading branch information
taoliu committed May 15, 2024
1 parent 3d32000 commit 0ae471c
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 120 deletions.
4 changes: 2 additions & 2 deletions MACS3/Commands/bdgbroadcall_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2020-12-03 14:26:29 Tao Liu>
# Time-stamp: <2024-05-15 10:41:43 Tao Liu>

"""Description: Fine-tuning script to call broad peaks from a single
bedGraph track for scores.
Expand Down Expand Up @@ -41,7 +41,7 @@
def run( options ):
info("Read and build bedGraph...")
bio = BedGraphIO.bedGraphIO(options.ifile)
btrack = bio.build_bdgtrack(baseline_value=0)
btrack = bio.read_bedGraph(baseline_value=0)

info("Call peaks from bedGraph...")

Expand Down
9 changes: 5 additions & 4 deletions MACS3/Commands/bdgcmp_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2020-11-30 13:49:34 Tao Liu>
# Time-stamp: <2024-05-15 10:13:23 Tao Liu>

"""Description: compare bdg files
Expand Down Expand Up @@ -39,11 +39,11 @@ def run( options ):

info("Read and build treatment bedGraph...")
tbio = BedGraphIO.bedGraphIO(options.tfile)
tbtrack = tbio.build_bdgtrack()
tbtrack = tbio.read_bedGraph()

info("Read and build control bedGraph...")
cbio = BedGraphIO.bedGraphIO(options.cfile)
cbtrack = cbio.build_bdgtrack()
cbtrack = cbio.read_bedGraph()

info("Build ScoreTrackII...")
sbtrack = tbtrack.make_ScoreTrackII_for_macs( cbtrack, depth1 = pseudo_depth, depth2 = pseudo_depth )
Expand Down Expand Up @@ -86,6 +86,7 @@ def run( options ):
raise Exception("Can't reach here!")

info("Write bedGraph of scores...")
ofhd = open(ofile,"w")
ofhd = open( ofile, "w" )
# write_bedGraph function for ScoreTrack
sbtrack.write_bedGraph(ofhd,name="%s_Scores" % (method.upper()),description="Scores calculated by %s" % (method.upper()), column = 3)
info("Finished '%s'! Please check '%s'!" % (method, ofile))
10 changes: 5 additions & 5 deletions MACS3/Commands/bdgdiff_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2020-11-26 17:04:26 Tao Liu>
# Time-stamp: <2024-05-15 10:42:27 Tao Liu>

"""Description: Naive call differential peaks from 4 bedGraph tracks for scores.
Expand Down Expand Up @@ -47,19 +47,19 @@ def run( options ):

info("Read and build treatment 1 bedGraph...")
t1bio = BedGraphIO.bedGraphIO(options.t1bdg)
t1btrack = t1bio.build_bdgtrack()
t1btrack = t1bio.read_bedGraph()

info("Read and build control 1 bedGraph...")
c1bio = BedGraphIO.bedGraphIO(options.c1bdg)
c1btrack = c1bio.build_bdgtrack()
c1btrack = c1bio.read_bedGraph()

info("Read and build treatment 2 bedGraph...")
t2bio = BedGraphIO.bedGraphIO(options.t2bdg)
t2btrack = t2bio.build_bdgtrack()
t2btrack = t2bio.read_bedGraph()

info("Read and build control 2 bedGraph...")
c2bio = BedGraphIO.bedGraphIO(options.c2bdg)
c2btrack = c2bio.build_bdgtrack()
c2btrack = c2bio.read_bedGraph()

depth1 = options.depth1
depth2 = options.depth2
Expand Down
11 changes: 5 additions & 6 deletions MACS3/Commands/bdgopt_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2020-11-24 16:46:33 Tao Liu>
# Time-stamp: <2024-05-15 11:15:48 Tao Liu>

"""Description: Modify bedGraph file
Expand Down Expand Up @@ -40,7 +40,7 @@ def run( options ):

info("Read and build bedGraph...")
bio = BedGraphIO.bedGraphIO(options.ifile)
btrack = bio.build_bdgtrack(baseline_value=0)
btrack = bio.read_bedGraph(baseline_value=0)

info("Modify bedGraph...")
if options.method.lower() == "p2q":
Expand All @@ -57,11 +57,10 @@ def run( options ):
elif options.method.lower() == "min":
btrack.apply_func( lambda x: x if x< extraparam else extraparam )

ofile = os.path.join( options.outdir, options.ofile )
ofile = BedGraphIO.bedGraphIO( os.path.join( options.outdir, options.ofile ), data = btrack )
info("Write bedGraph of modified scores...")
ofhd = open(ofile,"w")
btrack.write_bedGraph(ofhd,name="%s_modified_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
ofile.write_bedGraph(name="%s_modified_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
info("Finished '%s'! Please check '%s'!" % (options.method, ofile.bedGraph_filename))



4 changes: 2 additions & 2 deletions MACS3/Commands/bdgpeakcall_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2024-05-14 13:39:13 Tao Liu>
# Time-stamp: <2024-05-15 10:43:03 Tao Liu>

"""Description: Naive call peaks from a single bedGraph track for
scores.
Expand Down Expand Up @@ -39,7 +39,7 @@
def run( options ):
info("Read and build bedGraph...")
bio = BedGraphIO.bedGraphIO(options.ifile)
btrack = bio.build_bdgtrack(baseline_value=0)
btrack = bio.read_bedGraph(baseline_value=0)

if options.cutoff_analysis:
info("Analyze cutoff vs number of peaks/total length of peaks/average length of peak")
Expand Down
14 changes: 7 additions & 7 deletions MACS3/Commands/cmbreps_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2020-11-24 16:47:11 Tao Liu>
# Time-stamp: <2024-05-15 11:16:04 Tao Liu>

"""Description: combine replicates
Expand Down Expand Up @@ -38,16 +38,16 @@ def run( options ):
i = 1
for ifile in options.ifile:
info("Read file #%d" % i)
reps.append( BedGraphIO.bedGraphIO( ifile ).build_bdgtrack( ) )
reps.append( BedGraphIO.bedGraphIO( ifile ).read_bedGraph() )
i += 1

# first two reps

info("combining tracks 1-%i with method '%s'" % (i - 1, options.method))
cmbtrack = reps[ 0 ].overlie( [reps[ j ] for j in range(1, i - 1)], func=options.method )
ofile = os.path.join( options.outdir, options.ofile )

# now output
ofile = BedGraphIO.bedGraphIO( os.path.join( options.outdir, options.ofile ), data = cmbtrack )
info("Write bedGraph of combined scores...")
ofhd = open(ofile,"w")
cmbtrack.write_bedGraph(ofhd,name="%s_combined_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
ofile.write_bedGraph(name="%s_combined_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
info("Finished '%s'! Please check '%s'!" % (options.method, ofile.bedGraph_filename))

61 changes: 34 additions & 27 deletions MACS3/Commands/hmmratac_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2024-04-26 15:46:03 Tao Liu>
# Time-stamp: <2024-05-15 10:18:27 Tao Liu>

"""Description: Main HMMR command
Expand Down Expand Up @@ -27,13 +27,13 @@
from MACS3.Utilities.Constants import *
from MACS3.Utilities.OptValidator import opt_validate_hmmratac
from MACS3.IO.PeakIO import PeakIO
from MACS3.IO.PeakIO import BroadPeakIO
from MACS3.IO.Parser import BAMPEParser, BEDPEParser #BAMaccessor
from MACS3.Signal.HMMR_EM import HMMR_EM
from MACS3.Signal.HMMR_Signal_Processing import generate_weight_mapping, generate_digested_signals, extract_signals_from_regions
from MACS3.Signal.HMMR_HMM import hmm_training, hmm_predict, hmm_model_init, hmm_model_save
from MACS3.Signal.Region import Regions
from MACS3.Signal.BedGraph import bedGraphTrackI
from MACS3.IO.BedGraphIO import bedGraphIO

#from MACS3.IO.BED import BEDreader # this hasn't been implemented yet.

Expand Down Expand Up @@ -61,15 +61,21 @@ def run( args ):
mono_bdgfile = os.path.join( options.outdir, options.name+"_digested_mono.bdg" )
di_bdgfile = os.path.join( options.outdir, options.name+"_digested_di.bdg" )
tri_bdgfile = os.path.join( options.outdir, options.name+"_digested_tri.bdg" )

training_region_bedfile = os.path.join( options.outdir, options.name+"_training_regions.bed" )
training_datafile = os.path.join( options.outdir, options.name+"_training_data.txt" )
training_datalengthfile = os.path.join( options.outdir, options.name+"_training_lengths.txt" )

hmm_modelfile = os.path.join( options.outdir, options.name+"_model.json" )

open_state_bdgfile = os.path.join( options.outdir, options.name+"_open.bdg" )
nuc_state_bdgfile = os.path.join( options.outdir, options.name+"_nuc.bdg" )
bg_state_bdgfile = os.path.join( options.outdir, options.name+"_bg.bdg" )

states_file = os.path.join( options.outdir, options.name+"_states.bed" )

accessible_file = os.path.join( options.outdir, options.name+"_accessible_regions.narrowPeak" )

cutoffanalysis_file = os.path.join( options.outdir, options.name+"_cutoff_analysis.tsv" )

#############################################
Expand Down Expand Up @@ -158,18 +164,17 @@ def run( args ):

# save three types of signals if needed
if options.save_digested:
fhd = open(short_bdgfile,"w")
digested_atac_signals[ 0 ].write_bedGraph(fhd, "short","short")
fhd.close()
fhd = open(mono_bdgfile,"w")
digested_atac_signals[ 1 ].write_bedGraph(fhd, "mono","mono")
fhd.close()
fhd = open(di_bdgfile,"w")
digested_atac_signals[ 2 ].write_bedGraph(fhd, "di","di")
fhd.close()
fhd = open(tri_bdgfile,"w")
digested_atac_signals[ 3 ].write_bedGraph(fhd, "tri","tri")
fhd.close()
bdgshort = BedGraphIO( short_bdgfile, data = digested_atac_signals[ 0 ] )
bdgshort.write_bedGraph("short","short")

bdgmono = BedGraphIO( mono_bdgfile, data = digested_atac_signals[ 1 ] )
bdgmono.write_bedGraph("mono", "mono")

bdgdi = BedGraphIO( di_bdgfile, data = digested_atac_signals[ 2 ] )
bdgdi.write_bedGraph("di", "di")

bdgtri = BedGraphIO( tri_bdgfile, data = digested_atac_signals[ 3 ] )
bdgtri.write_bedGraph("tri", "tri")

minlen = int(petrack.average_template_length)
# if options.pileup_short is on, we pile up only the short fragments to identify training
Expand Down Expand Up @@ -389,10 +394,9 @@ def run( args ):
# Note: we implement in a way that we will decode the candidate regions 10000 regions at a time so 1. we can make it running in parallel in the future; 2. we can reduce the memory usage.
options.info( f"# Use HMM to predict states")
n = 0
#predicted_proba_file_name = "predicted_proba.csv"
#predicted_proba_file = open(predicted_proba_file_name, "wb")

# we create a temporary file to save the proba predicted from hmm
predicted_proba_file = tempfile.TemporaryFile(mode="w+b")
# predicted_proba_file = open("predicted_proba.csv", "w+b")

while candidate_regions.total != 0:
n += 1
Expand All @@ -414,11 +418,8 @@ def run( args ):
cr_data_lengths = []
cr_bins = []
prob_data = []

#options.debug( "# clean up complete")
gc.collect()

#predicted_proba_file.close()
predicted_proba_file.seek(0) # reset
options.info( f"# predicted_proba files written...")

Expand Down Expand Up @@ -449,7 +450,7 @@ def run( args ):
# # Generate states path:
states_path = generate_states_path( predicted_proba_file, options.hmm_binsize, i_open_region, i_nucleosomal_region, i_background_region )
options.info( f"# finished generating states path")
predicted_proba_file.close() #kill the tempfile
predicted_proba_file.close() #kill the temp file
# Save states path if needed
# PS: we need to implement extra feature to include those regions NOT in candidate_bins and assign them as 'background state'.
if options.save_states:
Expand All @@ -464,10 +465,15 @@ def run( args ):
options.info( f"# Finished")

def save_proba_to_bedGraph( predicted_proba_file, binsize, open_state_bdg_file, nuc_state_bdg_file, bg_state_bdg_file, i_open, i_nuc, i_bg ):
open_state_bdg = bedGraphTrackI( baseline_value = 0 )
nuc_state_bdg = bedGraphTrackI( baseline_value = 0 )
bg_state_bdg = bedGraphTrackI( baseline_value = 0 )

open_state_bdg_file = bedGraphIO( open_state_bdg_file )
nuc_state_bdg_file = bedGraphIO( nuc_state_bdg_file )
bg_state_bdg_file = bedGraphIO( bg_state_bdg_file )

open_state_bdg = open_state_bdg_file.data
nuc_state_bdg = nuc_state_bdg_file.data
bg_state_bdg = bg_state_bdg_file.data

prev_chrom_name = None
prev_bin_end = None

Expand Down Expand Up @@ -501,9 +507,9 @@ def save_proba_to_bedGraph( predicted_proba_file, binsize, open_state_bdg_file,
bg_state_bdg.add_loc( chrname, start_pos, end_pos, pp_bg )
prev_bin_end = end_pos

open_state_bdg.write_bedGraph( open_state_bdg_file, "Open States", "Likelihoods of being Open States" )
nuc_state_bdg.write_bedGraph( nuc_state_bdg_file, "Nucleosomal States", "Likelihoods of being Nucleosomal States" )
bg_state_bdg.write_bedGraph( bg_state_bdg_file, "Background States", "Likelihoods of being Background States" )
open_state_bdg_file.write_bedGraph( "Open States", "Likelihoods of being Open States", trackline = False )
nuc_state_bdg_file.write_bedGraph( "Nucleosomal States", "Likelihoods of being Nucleosomal States", trackline = False )
bg_state_bdg_file.write_bedGraph( "Background States", "Likelihoods of being Background States", trackline = False )
return

def save_states_bed( states_path, states_bedfile ):
Expand All @@ -515,6 +521,7 @@ def save_states_bed( states_path, states_bedfile ):
return

def generate_states_path(predicted_proba_file, binsize, i_open, i_nuc, i_bg):
# predicted_proba_file is a temporary file
ret_states_path = []
labels_list = ["open", "nuc", "bg"]

Expand Down
Loading

0 comments on commit 0ae471c

Please sign in to comment.