Skip to content

Commit 8e43700

Browse files
committed
updated comments
1 parent 7cf71a6 commit 8e43700

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

workflow/scripts/mask_fasta.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
from Bio import SeqIO
44

55
"""
6-
Replaces gene sequences set in config:fusion_genes
7-
in fasta file with Ns.
8-
9-
TO DO: instead of whole gene sequence, mask only exonic regions
6+
Replaces gene sequences set in config:fusion_genes:genes
7+
in fasta file with Ns. Either whole gene sequence or
8+
just exons can be masked (config:fusion_genes:feature_to_mask).
109
1110
Reason: plasmid expressing Dam fusion
1211
genes can be methylated at very high levels
@@ -57,14 +56,14 @@ def write_dict2fasta(d, out):
5756
except ValueError:
5857
continue # Skip empty line (last one)
5958

60-
# Load chromosome sequence where gene is located
59+
# Load chromosome sequence where gene feuture is located
6160
seq = chr_seq[chr]
6261

6362
# Correct start and end positions for 0-based indexing
6463
start = int(start) - 1
6564
end = int(end) - 1
6665

67-
# Mask gene sequence with Ns
66+
# Mask gene feature sequence with Ns
6867
seq_masked = seq[:start] + "N" * (end - start) + seq[end:]
6968

7069
# Replace sequence in dict

0 commit comments

Comments
 (0)