-
Notifications
You must be signed in to change notification settings - Fork 16
/
tsvawk
executable file
·34 lines (29 loc) · 1012 Bytes
/
tsvawk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env python2.7
"""
Wrapper around tabawk, letting you use column names instead of positions. It
makes column names from the header into awk global variables that are integers
for positions, so you can do things like
tsvawk '{print $id,$name}'
tsvawk '$count >= 5'
Your awk script will *not* see the header line from the file; this script absorbs it.
-r: repeat the header on output too. (Only good for "print $0" sorts of scripts.)
"""
import sys,os
if sys.stdin.isatty():
print>>sys.stderr, __doc__.strip()
sys.exit(1)
sys.stdin = open('/dev/stdin','U',buffering=0)
sys.stdout = open('/dev/stdout','w',buffering=0)
header = sys.stdin.readline()
colnames = header[:-1].split("\t")
flags = []
for i,colname in enumerate(colnames):
colname = colname.strip()
if not colname: continue
flags += ["-v", "%s=%s" % (colname, i+1)]
if '-r' in sys.argv:
print header[:-1]
sys.argv.pop(sys.argv.index('-r'))
#print flags
args = ["tabawk"] + flags + sys.argv[1:]
os.execvp( "tabawk", args )