-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimples.py
203 lines (161 loc) · 5.55 KB
/
simples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
"""
Simple Utilities | RPINerd, 03/20/24
Collection of very basic utilities for use in other scripts.
"""
def contains_n_consecutive(n: int, lst: list, sort: bool = False) -> bool:
"""
Check if an integer list contains n or more consecutive numbers
e.g. n = 3, lst = [1, 2, 3, 6, 10]
returns True because list contains 3 consecutive numbers (1,2,3)
n = 4, lst = [1, 4, 5, 6, 10]
returns False because the longest sequence of consescutive numbers is only 3 - (4,5,6)
:param int: n: Number of consecutive numbers to check for
:param list: lst: List of integers to check
:param bool: sort: Sort the list before checking
:rtype: bool
"""
if sort:
lst = sorted(lst)
prev = lst[0]
count = 1
for idx, e in enumerate(lst):
if e - prev == 1:
count += 1
else:
count = 1
if count == n:
return True
prev = e
return False
def ret_idt_repr(seq: str) -> str:
"""
Return IDT representation of an oligodesign2 sequence.
Uppercases all bases and adds a + before LNA bases (upper cased bases in OD2)
:param str: seq: Oligodesign2 sequence with/without LNAs
:rtype: str
"""
# Already in IDT format
if seq.find("+") != -1:
print("Sequence already in IDT format")
return seq
idt_seq = []
for alphabet in seq:
assert alphabet.lower() in ["a", "c", "g", "t"], seq
# Uppercase Base - LNA
if alphabet.upper() == alphabet:
alphabet = "+" + alphabet
# Lowercase Base
else:
alphabet = alphabet.upper()
idt_seq.append(alphabet)
return "".join(idt_seq)
def ret_od2_repr(seq: str) -> str:
"""
Return Oligodesign2 representation of an IDT sequence
:param str: seq: IDT sequence
:rtype: str
"""
is_LNA = False
od2_seq = []
for i, a in enumerate(seq):
assert a.upper() == a, "IDT bases should be upper case"
# Next base is an LNA base
if a == "+":
is_LNA = True
continue
if is_LNA:
od2_seq.append(a)
else:
od2_seq.append(a.lower())
is_LNA = False
return "".join(od2_seq)
def revcomp(seq: str) -> str:
"""
Return the reverse complement of a sequence
:param str: seq: Sequence to reverse complement
:rtype: str
"""
return seq.translate(str.maketrans("ATCGatcg", "TAGCtagc"))[::-1]
def translateRNA(seq: str) -> str:
"""
Return the translation of a RNA sequence
:param str: seq: RNA sequence to translate
:rtype: str
"""
return seq.translate(str.maketrans("AUGCaugc", "TACGtacg"))
def convertRNA(seq: str) -> str:
"""
Return the conversion of a RNA sequence to DNA
:param str: seq: RNA sequence to convert
:rtype: str
"""
return seq.translate(str.maketrans("Uu", "Tt"))
def complement(seq: str) -> str:
"""
Return the complement of a sequence
:param str: seq: Sequence to complement
:rtype: str
"""
return seq.translate(str.maketrans("ATCGatcg", "TAGCtagc"))
def look_forward_match(iterable: list | tuple, start: int, char: str) -> int:
"""
Look ahead in an iterable for the next point where a character is the same
:param iterable: iterable: A list/tuple to look forward through
:param int: start: The initial index to being from
:param str: char: The character to look for the end of in the sequence
"""
idx = start
end_idx = None
while not end_idx and idx < len(iterable):
idx += 1
if iterable[idx] == char:
end_idx = idx - 1
return end_idx
raise ValueError(
f"No match found looking forwards from index {start} along interable:\n{iterable[start:len(iterable)]}"
)
def look_forward_miss(iterable: list | tuple, start: int, char: str) -> int:
"""
Look ahead in an iterable for the next point where a character is different
:param iterable: iterable: A list/tuple to look forward through
:param int: start: The initial index to being from
:param str: char: The character to look for the end of in the sequence
"""
idx = start
end_idx = None
while not end_idx and idx < len(iterable):
idx += 1
if iterable[idx] != char:
end_idx = idx
return end_idx
def look_backward_match(iterable: list | tuple, start: int, char: str) -> int:
"""
Look behind in an iterable for the next point where a character is the same
:param iterable: iterable: A list/tuple to look backward through
:param int: start: The initial index to being from
:param str: char: The character to look for the end of in the sequence
"""
idx = start
end_idx = None
while not end_idx and idx > 0:
# print(idx)
# print(iterable[idx])
idx -= 1
if iterable[idx] == char:
end_idx = idx + 1
return end_idx
raise ValueError(f"No match found looking backwards from index {start} along interable:\n{iterable}")
def look_backward_miss(iterable: list | tuple, start: int, char: str) -> int:
"""
Look behind in an iterable for the next point where a character is different
:param iterable: iterable: A list/tuple to look backward through
:param int: start: The initial index to being from
:param str: char: The character to look for the end of in the sequence
"""
idx = start
end_idx = None
while not end_idx and idx > 0:
idx -= 1
if iterable[idx] != char:
end_idx = idx
return end_idx