Skip to content

Commit

Permalink
drafted simulation module + working on Genomes+Phenomes methods and t…
Browse files Browse the repository at this point in the history
…esting model building
  • Loading branch information
jeffersonfparil committed Nov 11, 2024
1 parent 8a7cacf commit ee81654
Show file tree
Hide file tree
Showing 7 changed files with 515 additions and 201 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: 🌱💚🧬
name: 🌱🧬

on:
push:
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Genomic breeding workflow

|**Build Status**|**License**|
|:--------------:|:---------:|
| <a href="https://github.com/jeffersonfparil/gb/actions"><img src="https://github.com/jeffersonfparil/gb/actions/workflows/test_pytest.yml/badge.svg"></a> | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) |
| <a href="https://github.com/jeffersonfparil/gb/actions"><img src="https://github.com/jeffersonfparil/gb/actions/workflows/python_workflow.yml/badge.svg"></a> | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) |

TODO: Consolidate db, gp, and simquantgen into this single workflow plus various selection strategies incorporating selection simulations

Expand All @@ -14,9 +14,9 @@ cd gb/
pip install uv
pip install ruff
uv init .
uv add pytest mypy numpy progressbar2 matplotlib polars scipy statsmodels click
uv add pytest mypy numpy progressbar2 scikit-learn matplotlib polars click
uv run ruff format
uv run ruff check
uv run mypy */*.py
uv run mypy data/*.py
uv run pytest -s
```
111 changes: 88 additions & 23 deletions data/genotype.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,104 @@
import numpy as np
# from data.error import Error
from typing import Self
from data.error import IncompatibleParameters


class Genomes:
"""
Genotype data
n: number of entries (an entry maybe an individual diploid genotype, or a tetraploid genotype or a pool of 50 diploid genotypes)
p: total number of loci
m: number of chromosomes
k: number of alleles per locus
entries: vector (nx0; str) of entry names
chromosomes: vector (px0; str) of chromosome names
positions: vector (px0; uint64) of positions per chromosome (i.e. starts at 1 per chromosome)
alleles: matrix (pxk; str) of allele names across p loci and k alleles
genotypes: matrix (nx(p*(k-1)); np.float64) of allele frequencies across n genotypes and p*(k-1) alleles,
where the k-1 alleles per locus are adjacent each other along the columns
mask: matrix (nx(p*(k-1)); np.bool) of boolean mask
"""

n: int
p: int
k: int
m: int
a: int
b: np.ndarray
entries: np.ndarray
chromosomes: np.ndarray
positions: np.ndarray
alleles: np.ndarray
X: np.ndarray
genotypes: np.ndarray
mask: np.ndarray

def __init__(self) -> None:
self.n = 1
self.p = 1
self.m = 1
self.a = 1
self.b = np.array([1.00])
self.entries = np.array(['dummy_entry_1'])
self.chromosomes = np.array(['chromosome_1'])
self.positions = np.array([123_456_789])
self.alleles = np.array(['allele 1|allele_2|allele_3\tallele_2'])
self.X = np.array([[1.00]])
self.mask = np.array([[False]])
def __init__(
self: Self,
entries: np.ndarray,
chromosomes: np.ndarray,
positions: np.ndarray,
alleles: np.ndarray,
genotypes: np.ndarray,
) -> None:
n: int = entries.shape[0]
if n != genotypes.shape[0]:
raise IncompatibleParameters
pk1: int = genotypes.shape[1]
k: int = alleles.shape[1]
if pk1 % (k - 1) > 0:
raise IncompatibleParameters
p: int = int(pk1 / (k - 1))
if (p != chromosomes.shape[0]) or (p != positions.shape[0]) or (p != alleles.shape[0]):
raise IncompatibleParameters
m: int = np.unique(ar=chromosomes).shape[0]
self.n = n
self.p = p
self.k = k
self.m = m
self.entries = entries
self.chromosomes = chromosomes
self.positions = positions
self.alleles = alleles
self.genotypes = genotypes
self.mask = np.ones((n, pk1)).astype(np.bool)

def __str__(self: Self) -> str:
info: str = (
'{\n\tn: '
+ str(self.n)
+ '\n\t'
+ 'p: '
+ str(self.p)
+ '\n\t'
+ 'k: '
+ str(self.k)
+ '\n\t'
+ 'm: '
+ str(self.m)
+ '\n\t'
+ 'entries: '
+ str(self.entries)
+ '\n\t'
+ 'chromosomes: '
+ str(self.chromosomes)
+ '\n\t'
+ 'positions: '
+ str(self.positions)
+ '\n\t'
+ 'alleles: '
+ str(self.alleles)
+ '\n\t'
+ 'genotypes: '
+ str(self.genotypes)
+ '\n\t'
+ 'mask: '
+ str(self.mask)
+ '\n}'
)
return info


def test_genotype():
genotypes = Genomes()
print(genotypes.X)
assert genotypes.n == 1
assert genotypes.n == genotypes.p
assert genotypes.p == genotypes.m
assert genotypes.m == genotypes.a
assert len(genotypes.b) == 1
from data.simulation import simulate

genomes, _ = simulate()
print(genomes)
assert isinstance(genomes, Genomes)
39 changes: 26 additions & 13 deletions data/phenotype.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
import numpy as np
# from data.error import Error
from data.error import IncompatibleParameters


class Phenomes:
"""
Phenotype data
n: number of entries (an entry maybe an individual diploid genotype, or a tetraploid genotype or a pool of 50 diploid genotypes)
ntraits: number of traits
entries: vector (nx0; str) of entry names
traits: vector (ntraitsx0; str) of trait names
genotypes: matrix (nxntraits); np.float64) of phenotype data
mask: matrix (nxntraits); np.bool) of boolean mask
"""

n: int
q: int
entries: np.ndarray
traits: np.ndarray
Y: np.ndarray
phenotypes: np.ndarray
mask: np.ndarray

def __init__(self) -> None:
self.n = 1
self.q = 1
self.entries = np.array(['dummy_entry_1'])
self.traits = np.array(['dummy_trait_1'])
self.Y = np.array([[0.00]])
self.mask = np.array([[False]])
def __init__(self, entries: np.ndarray, traits: np.ndarray, phenotypes: np.ndarray) -> None:
n: int = entries.shape[0]
if n != phenotypes.shape[0]:
raise IncompatibleParameters
ntraits: int = traits.shape[0]
if ntraits != phenotypes.shape[1]:
raise IncompatibleParameters
self.n = n
self.ntraits = ntraits
self.entries = entries
self.traits = traits
self.phenotypes = phenotypes
self.mask = np.ones((n, ntraits)).astype(np.bool)


def test_phenomes():
Y = Phenomes()
assert Y.n == 1
assert Y.n == Y.q
assert 1 == 1
Loading

0 comments on commit ee81654

Please sign in to comment.