-
Notifications
You must be signed in to change notification settings - Fork 16
/
tsv2fmt
executable file
·69 lines (55 loc) · 1.75 KB
/
tsv2fmt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python2.7
"""
Outputs an ascii-art table with aligned columns.
Reminiscent of Postgres and MySQL commandlines.
"""
#from __future__ import print_function
import sys
from collections import defaultdict
#import tsvutil
#tsvutil.fix_stdio()
OUTER_BORDERS = True
INNER_BORDERS = True
if '-o' in sys.argv:
OUTER_BORDERS = False
if '-b' in sys.argv:
OUTER_BORDERS = False
INNER_BORDERS = False
#width_hists = [defaultdict(lambda:0) for j in range(J)]
width_hists = defaultdict(lambda: defaultdict(lambda:0))
rows = []
for line in sys.stdin:
vals = line[:-1].split("\t")
vals = [v.strip() for v in vals]
#for j in range(J):
for j in xrange(len(vals)):
val_len = len(vals[j].decode('utf-8','replace'))
width_hists[j][val_len] += 1
rows.append(vals)
if not rows: sys.exit()
J = max(width_hists.keys()) + 1
assert range(J) == sorted(list(width_hists.keys()))
widths = [max(width_hists[j].keys()) for j in range(J)]
# todo, if >5ish rows, drop high-width outliers in a column when computing the
# global width to use. so rows with outliers look bad but rest are good.
# outlier rule something like, e.g. .. >=95percentile if min outlier > 20char bigger than 94th percentile
def print_row(row):
global widths
if OUTER_BORDERS: sys.stdout.write("| ")
if len(row) < J: row += [""] * (J - len(row))
joiner = " | " if INNER_BORDERS else " "
sys.stdout.write(joiner.join([("%-"+str(widths[j])+"s") % row[j] for j in range(J)]))
if OUTER_BORDERS: sys.stdout.write(" |")
sys.stdout.write("\n")
def print_sep():
global widths
for j in range(J):
sys.stdout.write("+"+("-"*(2+widths[j])))
print "+"
#print_sep()
#print_row(rows[0])
#print_sep()
#print("-"* (1+sum([w+3 for w in widths])))
for row in rows:
print_row(row)
#print_sep()