@@ -145,7 +145,16 @@ def _apply_iid_function(self,samples):
145
145
if self ._iid_function is not default_iid_function :
146
146
return np .array ([self ._iid_function (sample ) for sample in samples ],dtype = 'str' )
147
147
else :
148
- samples_np = np .stack (np .core .defchararray .split (samples ,',' ,maxsplit = 2 ))
148
+ try :
149
+ samples_np = np .stack (np .core .defchararray .split (samples ,',' ,maxsplit = 2 ))
150
+ except :
151
+ def split_and_fill (s ):
152
+ fields = s .split (',' ,2 )
153
+ if len (fields )== 1 :
154
+ return ['0' ,fields [0 ]]
155
+ else :
156
+ return fields
157
+ samples_np = np .array ([split_and_fill (sample ) for sample in samples ])
149
158
if samples_np .shape [1 ]== 1 :
150
159
samples_np = np .stack ([np .full (samples .shape ,'0' ),samples_np .reshape (- 1 )],axis = 1 )
151
160
row = samples_np
@@ -173,14 +182,14 @@ def _run_once(self):
173
182
174
183
assert os .path .exists (self .filename ), "Expect file to exist ('{0}')" .format (self .filename )
175
184
#!!!cmkassert os.path.getsize(self.filename)<2**31, "For now, Python cannot access files larger than about 2G bytes (see https://github.com/limix/bgen-reader-py/issues/29)"
176
- verbose = logging .getLogger ().level > = logging .INFO
185
+ verbose = logging .getLogger ().level < = logging .INFO
177
186
178
- self ._bgen = open_bgen (self .filename ,self ._sample ,verbose )
179
- self ._row = self ._apply_iid_function (self ._bgen .samples )
180
- self ._col = self ._apply_sid_function (self ._bgen .ids ,self ._bgen .rsids )
187
+ self ._open_bgen = open_bgen (self .filename ,self ._sample ,verbose )
188
+ self ._row = self ._apply_iid_function (self ._open_bgen .samples )
189
+ self ._col = self ._apply_sid_function (self ._open_bgen .ids ,self ._open_bgen .rsids )
181
190
self ._col_property = np .zeros ((len (self ._col ),3 ),dtype = 'float' )
182
- self ._col_property [:,0 ] = self ._bgen .chromosomes
183
- self ._col_property [:,2 ] = self ._bgen .positions
191
+ self ._col_property [:,0 ] = self ._open_bgen .chromosomes
192
+ self ._col_property [:,2 ] = self ._open_bgen .positions
184
193
185
194
self ._assert_iid_sid_pos (check_val = False )
186
195
@@ -191,8 +200,8 @@ def _read(self, iid_index_or_none, sid_index_or_none, order, dtype, force_python
191
200
if order == 'A' :
192
201
order = 'F'
193
202
194
- #cmk assert self._bgen .nalleles unique is 2, phased is all false, ploidy is 2
195
- val = self ._bgen .read ((iid_index_or_none ,sid_index_or_none ),dtype = dtype ,order = order )
203
+ #cmk assert self._open_bgen .nalleles unique is 2, phased is all false, ploidy is 2
204
+ val = self ._open_bgen .read ((iid_index_or_none ,sid_index_or_none ),dtype = dtype ,order = order )
196
205
return val
197
206
198
207
def __repr__ (self ):
@@ -214,8 +223,8 @@ def flush(self):
214
223
'''
215
224
if hasattr (self ,'_ran_once' ) and self ._ran_once :
216
225
self ._ran_once = False
217
- if hasattr (self ,'_bgen' ) and self ._bgen is not None : # we need to test this because Python doesn't guarantee that __init__ was fully run
218
- del self ._bgen
226
+ if hasattr (self ,'_bgen' ) and self ._open_bgen is not None : # we need to test this because Python doesn't guarantee that __init__ was fully run
227
+ del self ._open_bgen
219
228
220
229
@staticmethod
221
230
def write (filename , distreader , bits = 16 , compression = None , sample_function = default_sample_function , id_rsid_function = default_id_rsid_function , iid_function = default_iid_function , sid_function = default_sid_function , block_size = None , qctool_path = None , cleanup_temp_files = True ):
@@ -272,7 +281,7 @@ def write(filename, distreader, bits=16, compression=None, sample_function=defau
272
281
dir , file = os .path .split (filename )
273
282
if dir == '' :
274
283
dir = '.'
275
- metadatanpz = file + '.metadata.npz'
284
+ metadatanpz = open_bgen . _metadatapath_from_filename ( file )
276
285
samplefile = os .path .splitext (file )[0 ]+ '.sample'
277
286
genfile = os .path .splitext (file )[0 ]+ '.gen'
278
287
olddir = os .getcwd ()
@@ -374,7 +383,7 @@ def copyinputs(self, copier):
374
383
copier .input (self .filename )
375
384
if self ._sample is not None :
376
385
copier .input (self ._sample )
377
- metadata2 = self .filename + ".metadata.npz"
386
+ metadata2 = open_bgen . _metadatapath_from_filename ( self .filename )
378
387
if os .path .exists (metadata2 ):
379
388
copier .input (metadata2 )
380
389
@@ -520,8 +529,9 @@ def test_read1(self):
520
529
pstutil .create_directory_if_necessary (file_to )
521
530
if os .path .exists (file_to + ".metadata" ):
522
531
os .remove (file_to + ".metadata" )
523
- if os .path .exists (file_to + ".metadata.npz" ):
524
- os .remove (file_to + ".metadata.npz" )
532
+ meta = open_bgen ._metadatapath_from_filename (file_to )
533
+ if os .path .exists (meta ):
534
+ os .remove (meta )
525
535
shutil .copy (file_from ,file_to )
526
536
527
537
for loop_index in range (2 ):
@@ -542,12 +552,12 @@ def test_read1(self):
542
552
bgen = Bgen (file_to ,iid_function ,sid_function = sid_function )
543
553
assert bgen .sid [0 ]== 'SNPID_2,RSID_2'
544
554
545
- os .remove (file_to + ".metadata.npz" )
555
+ os .remove (bgen . _open_bgen . _metadatapath_from_filename ( file_to ) )
546
556
sid_function = lambda id ,rsid : '{0},{1}' .format (id ,rsid )
547
557
bgen = Bgen (file_to ,iid_function ,sid_function = sid_function )
548
558
assert bgen .sid [0 ]== 'SNPID_2,RSID_2'
549
559
550
- os .remove (file_to + ".metadata.npz" )
560
+ os .remove (bgen . _open_bgen . _metadatapath_from_filename ( file_to ) )
551
561
bgen = Bgen (file_to ,iid_function ,sid_function = 'rsid' )
552
562
assert np .array_equal (bgen .iid [0 ],['sample_001' , 'sample_001' ])
553
563
assert bgen .sid [0 ]== 'RSID_2'
0 commit comments