Skip to content

Commit

Permalink
working on genomes and phenotypes towards merging them together
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffersonfparil committed Nov 14, 2024
1 parent ec2a238 commit 2bea260
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 18 deletions.
30 changes: 20 additions & 10 deletions data/genotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ def update(
"""
Update the contents of the genotype data with automatic sorting of the rows (entries) and columns (loci)
"""
if not self.check_dimensions():
raise IncompatibleParameters
n: int = len(entries)
if n != genotypes.shape[0]:
raise IncompatibleParameters
Expand Down Expand Up @@ -234,6 +232,8 @@ def merge_genotype(
)
_, idx_rows, _ = np.intersect1d(entries, entries_intersection, return_indices=True)
_, idx_cols, _ = np.intersect1d(loci, loci_intersection, return_indices=True)
_, idx_rows_self, _ = np.intersect1d(self.entries, entries_intersection, return_indices=True)
_, idx_cols_self, _ = np.intersect1d(self.loci, loci_intersection, return_indices=True)
genotypes[np.ix_(idx_rows, idx_cols)] *= conflict_resolution[1]
genotypes[np.ix_(idx_rows, idx_cols)] += (
conflict_resolution[0] * self.genotypes[np.ix_(idx_rows_self, idx_cols_self)]
Expand All @@ -258,19 +258,29 @@ def test_genotype():
import copy

self, _ = simulate()
assert isinstance(self, Genomes)
print(self)

corrupted_genomes = copy.deepcopy(self)
corrupted_genomes.entries = np.array([''])
restored_genomes = copy.deepcopy(corrupted_genomes)
assert not corrupted_genomes.check_dimensions()

restored_genomes.update(
entries=self.entries, loci=self.loci, genotypes=self.genotypes, mask=self.mask
)
assert restored_genomes.check_dimensions()
assert restored_genomes == self

self.mask[:, :] = False
self.mask[1:6, 10:20] = True
for i in np.random.choice(a=self.mask.shape[0], size=3):
for j in np.random.choice(a=self.mask.shape[1], size=3):
self.mask[i, j] = True
other = copy.deepcopy(self)
other.slice_inplace()
conflict_resolution = 0.5, 0.5

assert isinstance(self, Genomes)
sliced_copy = self.slice()
assert other == sliced_copy
print(other)
print(sliced_copy)

conflict_resolution = 0.5, 0.5
merged = self.merge_genotype(other=other, conflict_resolution=conflict_resolution)
print(self)
print(merged)
assert merged == self
108 changes: 100 additions & 8 deletions data/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def check_dimensions(self: Self) -> bool:
n: int = len(self.entries)
if n != self.phenotypes.shape[0]:
return False
pk1: int = len(self.traits)
if pk1 != self.phenotypes.shape[1]:
t: int = len(self.traits)
if t != self.phenotypes.shape[1]:
return False
if self.phenotypes.shape != self.mask.shape:
return False
Expand All @@ -114,13 +114,11 @@ def update(
"""
Update the contents of the phenotype data with automatic sorting of the rows (entries) and columns (traits)
"""
if not self.check_dimensions():
raise IncompatibleParameters
n: int = len(entries)
if n != phenotypes.shape[0]:
raise IncompatibleParameters
pk1: int = len(traits)
if pk1 != phenotypes.shape[1]:
t: int = len(traits)
if t != phenotypes.shape[1]:
raise IncompatibleParameters
if phenotypes.shape != mask.shape:
raise IncompatibleParameters
Expand Down Expand Up @@ -171,6 +169,100 @@ def slice(self: Self) -> 'Phenomes':
)
return out

def merge_phenotype(
self: Self, other: Self, conflict_resolution: tuple[float, float]
) -> 'Phenomes':
"""
Merge two phenotype datasets
"""
if (not self.check_dimensions()) or (not other.check_dimensions()):
raise IncompatibleParameters
# Initialise the merged phenotype dataset
entries: np.ndarray = np.unique(np.concatenate((self.entries, other.entries)))
traits: np.ndarray = np.unique(np.concatenate((self.traits, other.traits)))
n: int = len(entries)
t: int = len(traits)
phenotypes: np.ndarray = np.full(
shape=(n, t), fill_value=np.nan, dtype=np.float64, order='C'
)
mask: np.ndarray = np.full(shape=(n, t), fill_value=True, dtype=bool, order='C')
merged: Phenomes = Phenomes()
# Insert the data from self
_, idx_rows, idx_rows_self = np.intersect1d(entries, self.entries, return_indices=True)
_, idx_cols, idx_cols_self = np.intersect1d(traits, self.traits, return_indices=True)
phenotypes[np.ix_(idx_rows, idx_cols)] = self.phenotypes[
np.ix_(idx_rows_self, idx_cols_self)
]
mask[np.ix_(idx_rows, idx_cols)] = self.mask[np.ix_(idx_rows_self, idx_cols_self)]
# Insert the data from other
_, idx_rows, idx_rows_other = np.intersect1d(entries, other.entries, return_indices=True)
_, idx_cols, idx_cols_other = np.intersect1d(traits, other.traits, return_indices=True)
phenotypes[np.ix_(idx_rows, idx_cols)] = other.phenotypes[
np.ix_(idx_rows_other, idx_cols_other)
]
mask[np.ix_(idx_rows, idx_cols)] = other.mask[np.ix_(idx_rows_other, idx_cols_other)]
# Resolve intersection using a weighted mean
if np.sum(conflict_resolution) != 1.00:
sum: float = np.sum(conflict_resolution)
conflict_resolution = (conflict_resolution[0] / sum, conflict_resolution[1] / sum)
entries_intersection, idx_rows_self, _ = np.intersect1d(
other.entries, self.entries, return_indices=True
)
traits_intersection, idx_cols_self, _ = np.intersect1d(
other.traits, self.traits, return_indices=True
)
_, idx_rows, _ = np.intersect1d(entries, entries_intersection, return_indices=True)
_, idx_cols, _ = np.intersect1d(traits, traits_intersection, return_indices=True)
_, idx_rows_self, _ = np.intersect1d(self.entries, entries_intersection, return_indices=True)
_, idx_cols_self, _ = np.intersect1d(self.traits, traits_intersection, return_indices=True)
phenotypes[np.ix_(idx_rows, idx_cols)] *= conflict_resolution[1]
phenotypes[np.ix_(idx_rows, idx_cols)] += (
conflict_resolution[0] * self.phenotypes[np.ix_(idx_rows_self, idx_cols_self)]
)
mask[np.ix_(idx_rows, idx_cols)] *= bool(conflict_resolution[1])
mask[np.ix_(idx_rows, idx_cols)] += (
bool(conflict_resolution[0]) * self.mask[np.ix_(idx_rows_self, idx_cols_self)]
)
# Update the merged phenotype dataset
merged.update(
entries=entries,
traits=traits,
phenotypes=phenotypes,
mask=mask,
)
return merged


def test_phenotype():
# from data.phenotype import *
from data.simulation import simulate
import copy

_, self = simulate()
assert isinstance(self, Phenomes)
print(self)

corrupted_phenomes = copy.deepcopy(self)
corrupted_phenomes.entries = np.array([''])
restored_phenomes = copy.deepcopy(corrupted_phenomes)
assert not corrupted_phenomes.check_dimensions()

restored_phenomes.update(
entries=self.entries, traits=self.traits, phenotypes=self.phenotypes, mask=self.mask
)
assert restored_phenomes.check_dimensions()
assert restored_phenomes == self

self.mask[:, :] = False
self.mask[:, :] = False
for i in np.random.choice(a=self.mask.shape[0], size=3):
for j in np.random.choice(a=self.mask.shape[1], size=3):
self.mask[i, j] = True
other = copy.deepcopy(self)
other.slice_inplace()
sliced_copy = self.slice()
assert other == sliced_copy

def test_phenomes():
assert 1 == 1
conflict_resolution = 0.5, 0.5
merged = self.merge_phenotype(other=other, conflict_resolution=conflict_resolution)
assert merged == self

0 comments on commit 2bea260

Please sign in to comment.