Skip to content

Commit

Permalink
Add support for carriage returns. Fixes #141
Browse files Browse the repository at this point in the history
  • Loading branch information
mdshw5 committed Aug 4, 2018
1 parent a36e6e1 commit f314b59
Show file tree
Hide file tree
Showing 3 changed files with 1,060 additions and 5 deletions.
17 changes: 12 additions & 5 deletions pyfaidx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

dna_bases = re.compile(r'([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)')

__version__ = '0.5.4.1'
__version__ = '0.5.4.2'


class KeyFunctionError(ValueError):
Expand Down Expand Up @@ -499,7 +499,7 @@ def read_fai(self):

def build_index(self):
try:
with self._fasta_opener(self.filename, 'r') as fastafile:
with self._fasta_opener(self.filename, 'rb') as fastafile:
with open(self.indexname, 'w') as indexfile:
rname = None # reference sequence name
offset = 0 # binary offset of end of current line
Expand All @@ -512,6 +512,7 @@ def build_index(self):
lastline = None
for i, line in enumerate(fastafile):
line_blen = len(line)
line = line.decode()
line_clen = len(line.rstrip('\n\r'))
lastline = i
# write an index line
Expand Down Expand Up @@ -667,7 +668,7 @@ def from_file(self, rname, start, end, internals=False):
seq = ''

if not internals:
return seq.replace('\n', '')
return seq.replace('\n', '').replace('\r', '')
else:
return (seq, locals())

Expand Down Expand Up @@ -709,14 +710,20 @@ def to_file(self, rname, start, end, seq):
)
elif len(seq) == len(file_seq) - internals['newlines_inside']:
line_len = internals['i'].lenc
if '\r\n' in file_seq:
newline_char = '\r\n'
elif '\r' in file_seq:
newline_char = '\r'
else:
newline_char = '\n'
self.file.seek(internals['bstart'])
if internals['newlines_inside'] == 0:
self.file.write(seq.encode())
elif internals['newlines_inside'] > 0:
n = 0
m = file_seq.index('\n')
m = file_seq.index(newline_char)
while m < len(seq):
self.file.write(''.join([seq[n:m], '\n']).encode())
self.file.write(''.join([seq[n:m], newline_char]).encode())
n = m
m += line_len
self.file.write(seq[n:].encode())
Expand Down
Loading

0 comments on commit f314b59

Please sign in to comment.