Skip to content

Commit

Permalink
Fix data types when declaring / passing data to Cython.
Browse files Browse the repository at this point in the history
  • Loading branch information
shz9 committed Apr 4, 2024
1 parent 54755ff commit 1a964f9
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 15 deletions.
4 changes: 2 additions & 2 deletions magenpy/GenotypeMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,8 @@ def from_file(cls, file_path, temp_dir='temp', **kwargs):
snp_table = snp_table.astype({
'CHR': int,
'SNP': str,
'cM': float,
'POS': int,
'cM': np.float32,
'POS': np.int32,
'A1': str,
'A2': str
})
Expand Down
2 changes: 1 addition & 1 deletion magenpy/parsers/misc_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def parse_ld_block_data(ldb_file_path):
df = pd.read_csv(ldb_file_path, sep=r'\s+')

df = df.loc[(df.start != 'None') & (df.stop != 'None')]
df = df.astype({'chr': str, 'start': np.int64, 'stop': np.int64})
df = df.astype({'chr': str, 'start': np.int32, 'stop': np.int32})
df = df.sort_values('start')

if df.isnull().values.any():
Expand Down
9 changes: 5 additions & 4 deletions magenpy/parsers/plink_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
import numpy as np


def parse_bim_file(plink_bfile):
Expand Down Expand Up @@ -31,8 +32,8 @@ def parse_bim_file(plink_bfile):
dtype={
'CHR': int,
'SNP': str,
'cM': float,
'POS': int,
'cM': np.float32,
'POS': np.int32,
'A1': str,
'A2': str
})
Expand Down Expand Up @@ -72,8 +73,8 @@ def parse_fam_file(plink_bfile):
'IID': str,
'fatherID': str,
'motherID': str,
'sex': float,
'phenotype': float
'sex': np.float32,
'phenotype': np.float32
},
na_values={
'phenotype': [-9.],
Expand Down
5 changes: 5 additions & 0 deletions magenpy/parsers/sumstats_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def parse(self, file_name, drop_na=True):
if self.col_name_converter is not None:
df.rename(columns=self.col_name_converter, inplace=True)

try:
df['POS'] = df['POS'].astype(np.int32)
except KeyError:
pass

return df


Expand Down
17 changes: 9 additions & 8 deletions magenpy/stats/ld/c_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# cython: infer_types=True

from libc.math cimport exp
from cython cimport integral
from libc.stdint cimport int64_t
from cython cimport integral, floating
cimport cython
import numpy as np

Expand All @@ -35,8 +36,8 @@ cpdef filter_ut_csr_matrix_low_memory(integral[::1] indptr, char[::1] bool_mask)


cdef:
long i, curr_row, row_bound, new_indptr_idx = 1, curr_shape=indptr.shape[0] - 1
long[::1] new_indptr = np.zeros(np.count_nonzero(bool_mask) + 1, dtype=np.int64)
int64_t i, curr_row, row_bound, new_indptr_idx = 1, curr_shape=indptr.shape[0] - 1
int64_t[::1] new_indptr = np.zeros(np.count_nonzero(bool_mask) + 1, dtype=np.int64)
char[::1] data_mask = np.zeros(indptr[curr_shape], dtype=np.int8)

with nogil:
Expand Down Expand Up @@ -70,7 +71,7 @@ cpdef filter_ut_csr_matrix_low_memory(integral[::1] indptr, char[::1] bool_mask)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.exceptval(check=False)
cpdef expand_ranges(integral[::1] start, integral[::1] end, long output_size):
cpdef expand_ranges(integral[::1] start, integral[::1] end, int64_t output_size):
"""
Given a set of start and end indices, expand them into one long vector that contains
the indices between all start and end positions.
Expand All @@ -83,7 +84,7 @@ cpdef expand_ranges(integral[::1] start, integral[::1] end, long output_size):

cdef:
integral i, j, size=start.shape[0]
long out_idx = 0
int64_t out_idx = 0
integral[::1] output

if integral is int:
Expand All @@ -104,7 +105,7 @@ cpdef expand_ranges(integral[::1] start, integral[::1] end, long output_size):
@cython.nonecheck(False)
@cython.cdivision(True)
@cython.exceptval(check=False)
cpdef find_ld_block_boundaries(integral[:] pos, integral[:, :] block_boundaries):
cpdef find_ld_block_boundaries(integral[:] pos, int[:, :] block_boundaries):
"""
Find the LD boundaries for the blockwise estimator of LD, i.e., the
indices of the leftmost and rightmost neighbors for each SNP.
Expand Down Expand Up @@ -146,7 +147,7 @@ cpdef find_ld_block_boundaries(integral[:] pos, integral[:, :] block_boundaries)
@cython.nonecheck(False)
@cython.cdivision(True)
@cython.exceptval(check=False)
cpdef find_windowed_ld_boundaries(double[:] pos, double max_dist):
cpdef find_windowed_ld_boundaries(floating[:] pos, double max_dist):
"""
Find the LD boundaries for the windowed estimator of LD, i.e., the
indices of the leftmost and rightmost neighbors for each SNP.
Expand Down Expand Up @@ -180,7 +181,7 @@ cpdef find_windowed_ld_boundaries(double[:] pos, double max_dist):
@cython.nonecheck(False)
@cython.cdivision(True)
@cython.exceptval(check=False)
cpdef find_shrinkage_ld_boundaries(double[:] cm_pos,
cpdef find_shrinkage_ld_boundaries(floating[:] cm_pos,
double genmap_ne,
int genmap_sample_size,
double cutoff):
Expand Down

0 comments on commit 1a964f9

Please sign in to comment.