|
| 1 | +import re |
| 2 | +def main(): |
| 3 | + |
| 4 | + sourcefile = open('Wt_remaining_with_hyphen.fasta') # source file |
| 5 | + data=sourcefile.read() |
| 6 | + sourcefile.close() |
| 7 | + data_blocks=data.split('>') |
| 8 | + block_name=['']*len(data_blocks) |
| 9 | + del_count_file=open('Wt_del_count_file.fasta','w') |
| 10 | + |
| 11 | + re_pattern=re.compile('C-*C-*A-*C-*T-*G-*C-*A-*T-*C-*C-*T-*G-*G-*G-*G-*A') |
| 12 | + |
| 13 | + for i in range(1,len(data_blocks)): |
| 14 | + paragraph=data_blocks[i] |
| 15 | + lines=paragraph.split('\n') |
| 16 | + block_name[i]=lines[0] |
| 17 | + |
| 18 | + m= re.search(re_pattern, lines[2]) |
| 19 | + if m!=None: |
| 20 | + target=m.group() |
| 21 | + sub1= re.search( 'T-*C-*C-*T', target) |
| 22 | + sub2= re.search( 'C-*C-*T-*G', target) |
| 23 | + sub3= re.search( 'C-*T-*G-*G', target) |
| 24 | + sub4= re.search( 'T-*G-*G-*G', target) |
| 25 | + sub5= re.search( 'G-*G-*G-*G', target) |
| 26 | + sub6= re.search( 'G-*G-*G-*A', target) |
| 27 | + core_positions=[] |
| 28 | + core_positions.append(m.start()+sub1.start()) |
| 29 | + core_positions.append(m.start()+sub2.start()) |
| 30 | + core_positions.append(m.start()+sub3.start()) |
| 31 | + core_positions.append(m.start()+sub4.start()) |
| 32 | + core_positions.append(m.start()+sub5.start()) |
| 33 | + core_positions.append(m.start()+sub6.start()) |
| 34 | + length_core_positions=len(core_positions) |
| 35 | + deletion_length=[0]*length_core_positions |
| 36 | + start_position=['']*length_core_positions |
| 37 | + |
| 38 | + for j in range(length_core_positions): |
| 39 | + position=core_positions[j] |
| 40 | + if lines[4][position]=='-': |
| 41 | + print('There is an delection========================') |
| 42 | + deletion_length[j] += 1 |
| 43 | + start_position[j]=position |
| 44 | + for k in range(position-1,0,-1): |
| 45 | + if lines[4][k]!='-': |
| 46 | + break |
| 47 | + deletion_length[j] += 1 |
| 48 | + start_position[j] -= 1 |
| 49 | + for m in range(position+1,len(lines[4])): |
| 50 | + if lines[4][m]!='-': |
| 51 | + break |
| 52 | + deletion_length[j] += 1 |
| 53 | + start_keys = start_position |
| 54 | + length_values = deletion_length |
| 55 | + start_length_dictionary = dict(zip(start_keys, length_values)) |
| 56 | + if '' in start_length_dictionary: |
| 57 | + del(start_length_dictionary['']) |
| 58 | + print(start_length_dictionary) |
| 59 | + total_del_length=str(sum(start_length_dictionary.values())) |
| 60 | + print('The total indel length is : ' + total_del_length) |
| 61 | + |
| 62 | + del_count_file.write('>'+ block_name[i]+'\n') |
| 63 | + del_count_file.write('The total delection length is : \n' + total_del_length + '\n') |
| 64 | + del_count_file.write('The sequence is : \n'+lines[4]+'\n\n') |
| 65 | + |
| 66 | + del_count_file.close() |
| 67 | + |
| 68 | +if __name__ == "__main__": main() |
0 commit comments