forked from roryk/junkdrawer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscriptLength.py
67 lines (54 loc) · 2.28 KB
/
transcriptLength.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
import logging
from argparse import ArgumentParser
import os
from gtfUtils import calculateLengths, outputLengths, GTFtoDict
from gtfUtils import filterByMinLength, filterByMaxLength, outputGTF
def main():
logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s',
level=logging.INFO)
description = "Counts up size of transcripts in a GTF file. Can also " \
"filter on minimum transcript size."
parser = ArgumentParser(description=description)
parser.add_argument("-g", "--gtf", dest="gtf", default=False,
type=str, required=True,
help="gtf file to analyze")
parser.add_argument("-l", "--length", dest="length", default=False,
action="store_true",
help="output length of each transcript")
parser.add_argument("-m", "--min_size", dest="min_size", default=False,
type=int,
help="remove transcripts below this size.")
parser.add_argument("-M", "--max_size", dest="max_size", default=False,
type=int,
help="remove transcripts greater than this size.")
parser.add_argument("-o", "--outfile", dest="outfn", default=False,
type=str)
args = parser.parse_args()
if not os.path.isfile(args.gtf):
logging.error("%s cannot be found." %(args.gtf))
parser.print_help()
exit(-1)
gtflines = GTFtoDict(args.gtf)
def checkOutFile(args):
if not args.outfn:
logging.error("need to provide an output filename.")
parser.print_help()
exit(-1)
if os.path.isfile(args.outfn):
logging.error("%s already exists, aborting." %(args.outfn))
exit(-1)
if args.length:
checkOutFile(args)
lengths = calculateLengths(gtflines)
outputLengths(lengths, args.outfn)
exit(1)
if args.min_size:
checkOutFile(args)
gtflines = filterByMinLength(gtflines, args.min_size)
if args.max_size:
checkOutFile(args)
gtflines = filterByMaxLength(gtflines, args.max_size)
outputGTF(gtflines, args.outfn)
if __name__ == "__main__":
main()