Skip to content

Commit

Permalink
Merge pull request #187 from teepean/add_selfdecode
Browse files Browse the repository at this point in the history
Add support for SelfDecode
  • Loading branch information
apriha authored Jan 31, 2025
2 parents 489de97 + 16b598e commit ff9369f
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions src/snps/io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ def read(self):
d = self.read_genes_for_good(file, compression)
elif "DNA.Land" in comments:
d = self.read_dnaland(file, compression)
elif "SelfDecode" in comments:
d = self.read_selfdecode(file, compression)
elif first_line.startswith("[Header]"):
# Global Screening Array, includes SANO and CODIGO46
d = self.read_gsa(file, compression, comments)
Expand Down Expand Up @@ -1144,6 +1146,48 @@ def parser():

return self.read_helper("Sano", parser)

def read_selfdecode(self, file, compression):
"""Read and parse SelfDecode file.
https://selfdecode.com/
Parameters
----------
file : str
path to file
Returns
-------
dict
result of `read_helper`
"""

def parser():
columnnames = ["rsid", "chrom", "pos", "genotype"]
dtype = NORMALIZED_DTYPES.copy()

# Temporarily use nullable UInt32 for 'pos' column
dtype["pos"] = pd.UInt32Dtype()
df = pd.read_csv(
file,
comment="#",
sep="\t",
na_values="--",
names=columnnames,
compression=compression,
dtype=dtype,
)
# Drop rows with NaN values in 'pos' column
df = df.dropna(subset=["pos"])
# Convert 'pos' column to np.uint32
df["pos"] = df["pos"].astype(np.uint32)

df = df.dropna(subset=["rsid", "chrom", "pos"])
df = df.astype(dtype=NORMALIZED_DTYPES)
df = df.set_index("rsid")
return (df,)

return self.read_helper("SelfDecode", parser)

def read_plink(self, file, compression):
"""Read and parse plink file.
Expand Down

0 comments on commit ff9369f

Please sign in to comment.