This repository was archived by the owner on May 11, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsequence_aligner.py
102 lines (89 loc) · 3.6 KB
/
sequence_aligner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# This file is part of sequence-aligner.
# Copyright (C) 2014 Christopher Kyle Horton <[email protected]>
# sequence-aligner is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# sequence-aligner is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with sequence-aligner. If not, see <http://www.gnu.org/licenses/>.
# MCS 5603 Intro to Bioinformatics, Fall 2014
# Christopher Kyle Horton (000516274), [email protected]
# Last modified: 11/6/2014
import argparse
from copy import deepcopy
import os.path
import webbrowser
from scoring_matrix import ScoringMatrix
from scoring_algorithm import get_alignments
from terminal_output import print_matrix, print_alignments
from html_output import write_html
version = "v1.0.0"
desc = "sequence-aligner " + version
desc += "\nFinds semi-global alignments between FASTA sequences."
infile_help="""
Reads in the sequence from the given file path if the file exists.
Otherwise, treats this as a sequence string to align.
"""
#============================================================================
# Main program code
#============================================================================
# Set up commandline argument parser
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=desc
)
parser.add_argument("sequence1", help=infile_help)
parser.add_argument("sequence2", help=infile_help)
parser.add_argument("-g", "--global-align", action="store_true",
help="Perform a global alignment instead.")
parser.add_argument("-v", "--view-html", action="store_true",
help="Automatically view HTML5 output in browser.")
args = parser.parse_args()
if args.global_align:
alignment_is_global = True
else:
alignment_is_global = False
# Read in sequences from FASTA files, if they exist
# Ignore first line since that's just header info, not part of the sequence
path1, path2 = args.sequence1, args.sequence2
sequence1 = sequence2 = ""
if os.path.exists(path1):
try:
with open(path1, 'r') as infile1_reading:
lines1 = infile1_reading.readlines()[1:]
for line in lines1:
sequence1 += line.upper().strip()
except IOError:
print "Error: could not open first file:", path1
exit(1)
else:
sequence1 = path1.upper()
if os.path.exists(path2):
try:
with open(path2, 'r') as infile2_reading:
lines2 = infile2_reading.readlines()[1:]
for line in lines2:
sequence2 += line.upper().strip()
except IOError:
print "Error: could not open second file:", path2
exit(1)
else:
sequence2 = path2.upper()
# Ensure sequence1 is always the longer one.
if len(sequence1) > len(sequence2):
sequence1, sequence2 = sequence2, sequence1
sm = ScoringMatrix(sequence1, sequence2)
alignments = get_alignments(sm, alignment_is_global)
print_matrix(sm)
print_alignments(deepcopy(alignments))
html_file = write_html(sm, alignments, alignment_is_global)
print "Output written to", html_file
if args.view_html:
webbrowser.get().open(html_file)
elif raw_input("Open HTML output in your web browser (y/n)? ").lower() == 'y':
webbrowser.get().open(html_file)
exit(0)