Skip to content

Commit

Permalink
phenotype tests and updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ptonner committed Mar 12, 2021
1 parent 136d6af commit 87e4db2
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 43 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

test:
pytest tests/*
pytest -W ignore::DeprecationWarning tests/*

cov:
pytest --cov-report html --cov-report term-missing --cov=lantern tests/
pytest -W ignore::DeprecationWarning --cov-report html --cov-report term-missing --cov=lantern tests/
3 changes: 1 addition & 2 deletions manuscript/train.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ rule cv:
input:
data(expand("data/processed/{name}.csv", name=config["name"]))
output:
os.path.join("experiments", config["name"], "{model}", "cv{cv}", "model.pt")
# expand("experiments/{ds}", ds=config["name"]) + "/{model}/cv{cv}/model.pt"
expand("experiments/{ds}/{model}/cv{cv}/model.pt", ds=config["name"], allow_missing=True)
shell:
"echo test"
1 change: 1 addition & 0 deletions src/lantern/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from lantern.module import Module
1 change: 1 addition & 0 deletions src/lantern/loss/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from lantern.loss.loss import Loss, Term
from lantern.loss.elbo_gp import ELBO_GP
5 changes: 1 addition & 4 deletions src/lantern/loss/elbo_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,7 @@ class ELBO_GP(Term):
mll = attr.ib(repr=False)
raw_sigma_hoc = attr.ib(repr=False)

def __attrs_post_init__(self):
self.raw_sigma_hoc = nn.Parameter(torch.randn(self.D) + self.sigma_hoc_offset)

def loss(self, yhat, y, noise, *args, **kwargs) -> dict:
def loss(self, yhat, y, noise=None, *args, **kwargs) -> dict:

if noise is not None:
# fix 1d obseravation, probably needs to be fixed longer tem
Expand Down
12 changes: 9 additions & 3 deletions src/lantern/loss/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,18 @@
import attr
from torch import nn

from lantern import Module

class Term(nn.Module):

@attr.s
class Term(Module):
"""A loss term used in optimizing a model.
"""

def loss(self, yhat, y, noise, *args, **kwargs) -> dict:
def forward(self, *args, **kwargs):
return self.loss(*args, **kwargs)

def loss(self, yhat, y, noise=None, *args, **kwargs) -> dict:
raise NotImplementedError()

def __add__(self, other):
Expand All @@ -19,7 +25,7 @@ def __add__(self, other):


@attr.s
class Loss:
class Loss(Module):

"""The loss used to optimize a model, composed of individual Term's
"""
Expand Down
2 changes: 2 additions & 0 deletions src/lantern/model/surface/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from lantern.model.surface.surface import Surface
from lantern.model.surface.phenotype import Phenotype
77 changes: 47 additions & 30 deletions src/lantern/model/surface/phenotype.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,51 @@
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from gpytorch.variational import VariationalDistribuiton
from gpytorch.distributions import MultivariateNormal
from gpytorch.variational import IndependentMultitaskVariationalStrategy
from gpytorch.means import ConstantMean, Mean
from gpytorch.kernel import Kernel, ScaleKernel, RQKernel
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RQKernel
import torch
import attr

from lantern.model.surface import Surface


@attr.s
class Phenotype(ApproximateGP, Surface):
class Phenotype(ApproximateGP):
"""A phenotype surface, learned with an approximate GP.
"""

inducing: torch.Tensor = attr.ib()
kernel: Kernel = attr.ib(default=ScaleKernel(RQKernel()))
mean: Mean = attr.ib(default=ConstantMean)
distribution: VariationalDistribuiton = attr.ib(
default=CholeskyVariationalDistribution
)
learn_inducing_locations: bool = attr.ib(default=True)

def __attrs_post_init__(self):

def __init__(
self,
D,
inducing_points,
strategy=CholeskyVariationalDistribution,
mean=ConstantMean,
kernel=lambda: ScaleKernel(RQKernel()),
):
size = torch.Size([])
if self.D > 1:
size = torch.Size([self.D])
if D > 1:
size = torch.Size([D])

variational_distribution = self.distribution(
self.inducing.size(-2), batch_shape=size
)
variational_distribution = strategy(inducing_points.size(-2), batch_shape=size)
variational_strategy = VariationalStrategy(
self,
self.inducing,
inducing_points,
variational_distribution,
learn_inducing_locations=self.learn_inducing_locations,
learn_inducing_locations=True,
)

if self.D > 1:
if D > 1:
variational_strategy = IndependentMultitaskVariationalStrategy(
variational_strategy, num_tasks=self.D
variational_strategy, num_tasks=D
)

ApproximateGP.__init__(self, variational_strategy)
super(Phenotype, self).__init__(variational_strategy)

self.mean_module = self.mean(batch_shape=size)
self.mean = mean(batch_shape=size)
self.kernel = kernel()
self.D = D
self.K = inducing_points.size(-1)

def forward(self, z):
mean_x = self.mean_module(z)
mean_x = self.mean(z)
covar_x = self.kernel(z)
return MultivariateNormal(mean_x, covar_x)

Expand All @@ -74,3 +68,26 @@ def loss(self, *args, **kwargs):
from lantern.loss import ELBO_GP

return ELBO_GP.fromGP(self, *args, **kwargs)

@classmethod
def fromDataset(cls, ds, K, Ni=800, inducScale=10, *args, **kwargs):
"""Build a phenotype surface from a dataset.
:param ds: Dataset for build a phenotype from.
:type ds: lantern.dataset.Dataset
:param K: Number of latent dimesions
:type K: int
:param Ni: Number of inducing points
:type Ni: int
:param inducScale: Range to initialize inducing points over (uniform from [-inducScale, inducScale])
:type inducScale: float
"""
D = ds.D
if D > 1:
shape = (D, Ni, K)
else:
shape = (Ni, K)

return cls(
D, -inducScale + 2 * inducScale * torch.rand(*shape), *args, **kwargs
)
5 changes: 3 additions & 2 deletions src/lantern/model/surface/surface.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from torch import nn
import attr

from lantern.module import Module


@attr.s
class Surface(nn.Module):
class Surface(Module):

D: int = attr.ib()

Expand Down
16 changes: 16 additions & 0 deletions src/lantern/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from torch import nn
import attr


@attr.s()
class Module(nn.Module):
"""A base module for lantern components
This module is necessary to play nicely b/w attrs and
pytorch. Some discussion is available here:
https://github.com/python-attrs/attrs/issues/393#issuecomment-510148031
"""

def __attrs_pre_init__(self):
# torch module is initialized before assigning attributes
nn.Module.__init__(self)
96 changes: 96 additions & 0 deletions tests/test_phenotype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from gpytorch.distributions import MultivariateNormal, MultitaskMultivariateNormal
from gpytorch.variational import IndependentMultitaskVariationalStrategy
from gpytorch.means import ConstantMean, Mean
from gpytorch.kernels import Kernel, ScaleKernel, RQKernel
import pandas as pd
import numpy as np
import torch

from lantern.model.surface import Phenotype
from lantern.loss import ELBO_GP
from lantern.dataset import Dataset


def test_1d():

induc = torch.rand(100, 10)
phen = Phenotype(1, induc)

assert type(phen.variational_strategy) == VariationalStrategy

mvn = phen(torch.rand(50, 10))
assert type(mvn) == MultivariateNormal
assert mvn.mean.shape == (50,)

assert np.allclose(induc.numpy(), phen._get_induc())

induc = torch.rand(100, 10)
phen._set_induc(induc.numpy())
assert np.allclose(induc.numpy(), phen._get_induc())


def test_multid():

induc = torch.rand(100, 10)
phen = Phenotype(4, induc)

assert type(phen.variational_strategy) == IndependentMultitaskVariationalStrategy

mvn = phen(torch.rand(50, 10))
assert type(mvn) == MultitaskMultivariateNormal
assert mvn.mean.shape == (50, 4)

assert np.allclose(induc.numpy(), phen._get_induc())

induc = torch.rand(100, 10)
phen._set_induc(induc.numpy())
assert np.allclose(induc.numpy(), phen._get_induc())


def test_loss():

induc = torch.rand(100, 10)
phen = Phenotype(1, induc)
loss = phen.loss(N=1000)
assert type(loss) == ELBO_GP

mvn = phen(torch.randn(50, 10))

lss = loss(mvn, torch.randn(50))
assert "neg-loglikelihood" in lss
assert "neg-log-gp-prior" in lss
assert "gp-kl" in lss


def test_ds_construct_1d():

df = pd.DataFrame(
{"substitutions": ["a1b", "c2d"], "phenotype": [0.0, 1.0], "error": [0.1, 0.2],}
)
ds = Dataset(df)
phen = Phenotype.fromDataset(ds, 10)

assert phen.K == 10
assert phen.D == 1


def test_ds_construct_multid():

df = pd.DataFrame(
{
"substitutions": ["a1b", "c2d"],
"p1": [0.0, 1.0],
"p2": [1.0, 0.0],
"e1": [0.1, 0.2],
"e2": [0.2, 0.1],
}
)

ds = Dataset(df, phenotypes=["p1", "p2"], errors=["e1", "e2"])
phen = Phenotype.fromDataset(ds, 10)

assert phen.K == 10
assert phen.D == 2

0 comments on commit 87e4db2

Please sign in to comment.