Commit 8e43700 1 parent 7cf71a6 commit 8e43700 Copy full SHA for 8e43700
File tree 1 file changed +5
-6
lines changed
1 file changed +5
-6
lines changed Original file line number Diff line number Diff line change 3
3
from Bio import SeqIO
4
4
5
5
"""
6
- Replaces gene sequences set in config:fusion_genes
7
- in fasta file with Ns.
8
-
9
- TO DO: instead of whole gene sequence, mask only exonic regions
6
+ Replaces gene sequences set in config:fusion_genes:genes
7
+ in fasta file with Ns. Either whole gene sequence or
8
+ just exons can be masked (config:fusion_genes:feature_to_mask).
10
9
11
10
Reason: plasmid expressing Dam fusion
12
11
genes can be methylated at very high levels
@@ -57,14 +56,14 @@ def write_dict2fasta(d, out):
57
56
except ValueError :
58
57
continue # Skip empty line (last one)
59
58
60
- # Load chromosome sequence where gene is located
59
+ # Load chromosome sequence where gene feuture is located
61
60
seq = chr_seq [chr ]
62
61
63
62
# Correct start and end positions for 0-based indexing
64
63
start = int (start ) - 1
65
64
end = int (end ) - 1
66
65
67
- # Mask gene sequence with Ns
66
+ # Mask gene feature sequence with Ns
68
67
seq_masked = seq [:start ] + "N" * (end - start ) + seq [end :]
69
68
70
69
# Replace sequence in dict
You can’t perform that action at this time.
0 commit comments