Skip to content

Commit

Permalink
Minor fixes for some python2 bugs. Addressed #42. Fixes #41.
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus1487 committed Mar 20, 2018
1 parent 5710cac commit 9a2cbad
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 39 deletions.
2 changes: 1 addition & 1 deletion tombo/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

TOMBO_VERSION = '1.2.1'
TOMBO_VERSION = '1.2.1b'
50 changes: 16 additions & 34 deletions tombo/plot_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ def plot_per_read_roc(
'stat':r.FloatVector(unzip_stats[0]),
'motif_match':r.BoolVector(unzip_stats[1])})

# python2 rpy2 ListVector can't take unicode keys
if sys.version_info[0] < 3:
conv_all_motif_stats_for_r = {}
for k, v in all_motif_stats_for_r.items():
conv_all_motif_stats_for_r[k.encode()] = v
all_motif_stats_for_r = conv_all_motif_stats_for_r
all_motif_stats_for_r = r.ListVector(all_motif_stats_for_r)

if VERBOSE: sys.stderr.write('Computing accuracy statistics.\n')
Expand Down Expand Up @@ -889,7 +895,7 @@ def plot_corrections(
def plot_multi_corrections(
f5_dirs1, corrected_group, basecall_subgroups, pdf_fn,
num_reads_per_plot, num_regions, num_obs, include_orig_bcs,
genome_locations):
genome_locs):
th._warning_message('The plot_multi_correction command may be deprecated ' +
'in future versions of Tombo.')
num_regions = num_regions if num_regions % 2 == 0 else \
Expand All @@ -898,7 +904,7 @@ def plot_multi_corrections(
f5_dirs1, corrected_group, basecall_subgroups)
read_coverage = th.get_coverage(raw_read_coverage)

if genome_locations is None:
if genome_locs is None:
coverage_regions = []
for (chrm, strand), cs_coverage in read_coverage.items():
reg_covs, reg_lens = zip(*[
Expand All @@ -923,16 +929,7 @@ def plot_multi_corrections(
'number of reads than requested.')
else:
if VERBOSE: sys.stderr.write('Parsing genome locations.\n')
parsed_locations = []
for chrm_pos_strand in genome_locations:
split_vals = chrm_pos_strand.replace('"', '').replace(
"'", "").split(':')[:3]
# default to plus strand if not specified
if len(split_vals) == 2:
parsed_locations.append((
split_vals[0], split_vals[1], '+'))
else:
parsed_locations.append(split_vals)
parsed_locs = th.parse_genome_locations(genome_locs, default_strand='+')
plot_locs = [
('{:03d}'.format(i), (chrm, int(pos) - 1, strand))
for i, (chrm, pos, strand) in enumerate(parsed_locations)]
Expand Down Expand Up @@ -1480,22 +1477,14 @@ def plot_max_coverage(
def plot_genome_locations(
f5_dirs1, corrected_group, basecall_subgroups, pdf_fn,
f5_dirs2, num_bases, overplot_thresh, overplot_type,
genome_locations, tb_model_fn, alt_model_fn, plot_default_stnd,
genome_locs, tb_model_fn, alt_model_fn, plot_default_stnd,
plot_default_alt):
if VERBOSE: sys.stderr.write('Parsing genome locations.\n')
# ignore strand for genome location plotting
genome_locations = [
chrm_pos.replace('"', '').replace("'", "").split(':')[:3]
for chrm_pos in genome_locations]
# minus one here as all python internal coords are 0-based, but
# genome is generally 1-based
plot_intervals = []
for i, chrm_pos_strand in enumerate(genome_locations):
if len(chrm_pos_strand) == 2:
chrm, pos = chrm_pos_strand
strand = None
else:
chrm, pos, strand = chrm_pos_strand
for i, (chrm, pos, strand) in enumerate(
th.parse_genome_locations(genome_locs)):
int_start = max(
0, int(int(pos) - np.floor(num_bases / 2.0) - 1))
plot_intervals.append(th.intervalData(
Expand Down Expand Up @@ -1527,19 +1516,12 @@ def plot_genome_locations(

def plot_per_read_mods_genome_location(
f5_dirs, corrected_group, basecall_subgroups, pdf_fn,
per_read_stats_fn, genome_locations, num_bases, num_reads, box_center,
per_read_stats_fn, genome_locs, num_bases, num_reads, box_center,
fasta_fn):
if VERBOSE: sys.stderr.write('Parsing genome locations.\n')
genome_locations = [
chrm_pos.replace('"', '').replace("'", "").split(':')[:3]
for chrm_pos in genome_locations]
plot_intervals = []
for i, chrm_pos_strand in enumerate(genome_locations):
if len(chrm_pos_strand) == 2:
chrm, pos = chrm_pos_strand
strand = '+'
else:
chrm, pos, strand = chrm_pos_strand
for i, (chrm, pos, strand) in enumerate(th.parse_genome_locations(
genome_locs, default_strand='+')):
int_start = max(
0, int(int(pos) - np.floor(num_bases / 2.0) - 1) + 1)
plot_intervals.append(th.intervalData(
Expand Down Expand Up @@ -2146,7 +2128,7 @@ def plot_main(args):
if 'num_obs' in args else None),]
nread_opt = [('num_reads', args.num_reads
if 'num_reads' in args else None),]
glocs_opt = [('genome_locations', args.genome_locations
glocs_opt = [('genome_locs', args.genome_locations
if 'genome_locations' in args else None),]
f5dirs2_opt = [('f5_dirs2', args.control_fast5_basedirs
if 'control_fast5_basedirs' in args else None),]
Expand Down
3 changes: 1 addition & 2 deletions tombo/resquiggle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,8 +1128,7 @@ def resquiggle_all_reads(
rsqgl_args = (
proc_rsqgl_conns, std_ref, outlier_thresh, corr_grp, bio_samp_type,
seg_params, sig_aln_params, obs_filter, index_q is None, const_scale)
rsqgl_process = Process(target=_resquiggle_worker, args=rsqgl_args,
daemon=True)
rsqgl_process = Process(target=_resquiggle_worker, args=rsqgl_args)
rsqgl_process.start()

# now open mapping thread for each map connection created above
Expand Down
21 changes: 21 additions & 0 deletions tombo/tombo_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,27 @@ def get_chrm_sizes(raw_read_coverage, raw_read_coverage2=None):
for chrm, strnd_sizes in
strand_chrm_sizes.items())

def parse_genome_locations(genome_locs, default_strand=None):
parsed_locs = []
for chrm_pos_strand in genome_locs:
# strip off any quotes and return up to the first 3 values
split_vals = chrm_pos_strand.replace('"', '').replace(
"'", "").split(':')[:3]
# default to plus strand if not specified
if len(split_vals) == 1:
_error_message_and_exit(
'Invalid genome location provided: ' + chrm_pos_strand +
'\n\t\tTry adding quotation marks around specified genome ' +
'locations (especially for sequence identifiers with ' +
'special characters).')
elif len(split_vals) == 2:
parsed_locs.append((
split_vals[0], split_vals[1], default_strand))
else:
parsed_locs.append(split_vals)

return parsed_locs

class TomboMotif(object):
def _parse_motif(self, rev_comp_motif=False):
"""
Expand Down
6 changes: 4 additions & 2 deletions tombo/tombo_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,8 @@ def calc_damp_fraction(self, cov_damp_counts):
# on the fraction estimation as a binomial variable)
damp_frac = (non_mod_counts + cov_damp_counts[0]) / (
self.stats['valid_cov'] + sum(cov_damp_counts))
self.stats = append_fields(self.stats, 'damp_frac', damp_frac)
damp_name = 'damp_frac' if sys.version_info[0] > 2 else b'damp_frac'
self.stats = append_fields(self.stats, damp_name, damp_frac)

return

Expand Down Expand Up @@ -2018,7 +2019,8 @@ def compute_read_stats(
ctrl_cov = [ctrl_cov[pos] if pos in ctrl_cov else 0
for pos in reg_poss]
else:
ctrl_cov = repeat(0, reg_poss.shape[0])
# convert to list since python2 repeat objects can't be pickled
ctrl_cov = list(repeat(0, reg_poss.shape[0]))

return reg_base_stats, us_reg_poss, reg_cov, ctrl_cov, valid_cov

Expand Down

0 comments on commit 9a2cbad

Please sign in to comment.