-
Notifications
You must be signed in to change notification settings - Fork 0
/
GCcontent.py
41 lines (33 loc) · 1.49 KB
/
GCcontent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
### Calculate the GC content for a DNA sequence
### Yanni Chen
### 4/27/2020
##################################################
# 1. Create a random DNA sequence that is 5,000 bp long
# 2. Break the sequence into smaller sequences 100 bp long
# 3. Creat a GC function to calculate GC content
# 4. Use the GC function to calculate GC content of each fragment
##################################################
print("##################################################")
print("# 1. Create a random DNA sequence that is 5,000 bp long")
import random
bases = ['A', 'T', 'G', 'C']
random_bases = random.choices(bases, k=5000)
seq =','.join(random_bases).replace(',', '')
print(seq)
print("##################################################")
print("# 2. Break the sequence into smaller sequences 100 bp long")
x=100
seq_100 = [seq[i:i+100] for i in range(0, len(seq), x)]
print(seq_100)
print("##################################################")
print("# 3. Creat a GC function to calculate GC content")
def GC_content(input_seq):
"""This function is used to calculate the GC content of each input sequence"""
dna_count = input_seq.count('A') + input_seq.count('T') + input_seq.count('G') + input_seq.count('C')
gc_count = input_seq.count('G') + input_seq.count('C')
gc_content = gc_count / dna_count
return(gc_content)
print("##################################################")
print("4. Use the GC function to calculate GC content of each fragment")
gc_each = [GC_content(i) for i in seq_100]
print(gc_each)