-
Notifications
You must be signed in to change notification settings - Fork 0
/
Sequence.py
141 lines (68 loc) · 2.56 KB
/
Sequence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
###### DNA PROJECT #####
class Sequence:
def __init__(self, dna_seq):
"""This is the constructor class. This __init__ class makes the input case insensitive"""
self.dna_seq = dna_seq.lower()
def base_count(self):
"""base_count returns the number of bases in the input sequence object"""
count = len(self.dna_seq)
return(count)
def is_dna(self):
"""Checks whether the input Sequence object is a DNA"""
valid_dna = "atgc"
validity = all(i in valid_dna for i in self.dna_seq)
return(validity)
def __eq__(self,other):
"""Overrides the default __eq__ with the newly defined comparison function"""
if isinstance(self,Sequence) == isinstance(other,Sequence) and self.dna_seq == other.dna_seq:
return(True)
else:
return(False)
def comp_seq(self):
"""Generates complementary sequence object for the input sequence object"""
comp_base = str.maketrans("atgc","tacg")
comp_dna = self.dna_seq.translate(comp_base)
comp_dna = Sequence(comp_dna)
return(comp_dna)
def mis_match(self,other):
"""Returns the first mismatch index value"""
seq1 = self.dna_seq
seq2 = other.dna_seq
if len(seq1) != len(seq2):
raise Exception('Cannot compare sequences of different lengths')
else:
for i in range(0,len(seq1)):
if seq1[i] != seq2[i]:
return(i)
break
else:
return(-1)
def gene_finder(self):
"""Split the genome file into individual genes based on the full stop sequence and generates a list"""
seq = self.dna_seq
stop_code = 'AAAAAAAAAATTTTTTTTTT'
genes = seq.split(stop_code.lower())
genes = [Sequence(gene) for gene in genes]
return(genes)
def swap_mut(self,other):
"""Counts the number of swap mutation for two sequence objects being compared and returns the count"""
seq1 = self.dna_seq
seq2 = other.dna_seq
mut_count = 0
if len(seq1) != len(seq2):
raise Exception('Cannot compare sequences of different lengths')
else:
for i in range(0,len(seq1)):
if seq1[i] != seq2[i]:
mut_count += 1
return(mut_count)
###########################################################################################################################################################
def read_genome(file): #reads a genome file and creates sequence object containing the DNA sequence
from itertools import islice
with open(file,mode = "r", encoding = "ascii") as genome:
for line in islice(genome,2):
seq_dat = line
seq_dat = Sequence(seq_dat)
seq_len = Sequence.base_count(seq_dat)
print("Number of bases in this file",seq_len)
return(seq_dat)