phenotype tests and updates

usnistgov · Mar 12, 2021 · 87e4db2 · 87e4db2
1 parent 136d6af
commit 87e4db2
Show file tree

Hide file tree

Showing 11 changed files with 179 additions and 43 deletions.
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 
 test:
-	pytest tests/*
+	pytest -W ignore::DeprecationWarning tests/*
 
 cov:
-	pytest --cov-report html --cov-report term-missing --cov=lantern tests/
+	pytest -W ignore::DeprecationWarning --cov-report html --cov-report term-missing --cov=lantern tests/
diff --git a/manuscript/train.smk b/manuscript/train.smk
@@ -8,7 +8,6 @@ rule cv:
     input:
         data(expand("data/processed/{name}.csv", name=config["name"]))
     output:
-        os.path.join("experiments", config["name"], "{model}", "cv{cv}", "model.pt")
-        # expand("experiments/{ds}", ds=config["name"]) + "/{model}/cv{cv}/model.pt"
+        expand("experiments/{ds}/{model}/cv{cv}/model.pt", ds=config["name"], allow_missing=True)
     shell:
         "echo test"
diff --git a/src/lantern/__init__.py b/src/lantern/__init__.py
@@ -0,0 +1 @@
+from lantern.module import Module
diff --git a/src/lantern/loss/__init__.py b/src/lantern/loss/__init__.py
@@ -1 +1,2 @@
 from lantern.loss.loss import Loss, Term
+from lantern.loss.elbo_gp import ELBO_GP
diff --git a/src/lantern/loss/elbo_gp.py b/src/lantern/loss/elbo_gp.py
@@ -28,10 +28,7 @@ class ELBO_GP(Term):
     mll = attr.ib(repr=False)
     raw_sigma_hoc = attr.ib(repr=False)
 
-    def __attrs_post_init__(self):
-        self.raw_sigma_hoc = nn.Parameter(torch.randn(self.D) + self.sigma_hoc_offset)
-
-    def loss(self, yhat, y, noise, *args, **kwargs) -> dict:
+    def loss(self, yhat, y, noise=None, *args, **kwargs) -> dict:
 
         if noise is not None:
             # fix 1d obseravation, probably needs to be fixed longer tem

diff --git a/src/lantern/loss/loss.py b/src/lantern/loss/loss.py
@@ -3,12 +3,18 @@
 import attr
 from torch import nn
 
+from lantern import Module
 
-class Term(nn.Module):
+
+@attr.s
+class Term(Module):
     """A loss term used in optimizing a model.
     """
 
-    def loss(self, yhat, y, noise, *args, **kwargs) -> dict:
+    def forward(self, *args, **kwargs):
+        return self.loss(*args, **kwargs)
+
+    def loss(self, yhat, y, noise=None, *args, **kwargs) -> dict:
         raise NotImplementedError()
 
     def __add__(self, other):
@@ -19,7 +25,7 @@ def __add__(self, other):
 
 
 @attr.s
-class Loss:
+class Loss(Module):
 
     """The loss used to optimize a model, composed of individual Term's
     """

diff --git a/src/lantern/model/surface/__init__.py b/src/lantern/model/surface/__init__.py
@@ -0,0 +1,2 @@
+from lantern.model.surface.surface import Surface
+from lantern.model.surface.phenotype import Phenotype
diff --git a/src/lantern/model/surface/phenotype.py b/src/lantern/model/surface/phenotype.py
@@ -1,57 +1,51 @@
 from gpytorch.models import ApproximateGP
 from gpytorch.variational import CholeskyVariationalDistribution
 from gpytorch.variational import VariationalStrategy
-from gpytorch.variational import VariationalDistribuiton
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.variational import IndependentMultitaskVariationalStrategy
-from gpytorch.means import ConstantMean, Mean
-from gpytorch.kernel import Kernel, ScaleKernel, RQKernel
+from gpytorch.means import ConstantMean
+from gpytorch.kernels import ScaleKernel, RQKernel
 import torch
-import attr
 
-from lantern.model.surface import Surface
 
-
-@attr.s
-class Phenotype(ApproximateGP, Surface):
+class Phenotype(ApproximateGP):
     """A phenotype surface, learned with an approximate GP.
     """
 
-    inducing: torch.Tensor = attr.ib()
-    kernel: Kernel = attr.ib(default=ScaleKernel(RQKernel()))
-    mean: Mean = attr.ib(default=ConstantMean)
-    distribution: VariationalDistribuiton = attr.ib(
-        default=CholeskyVariationalDistribution
-    )
-    learn_inducing_locations: bool = attr.ib(default=True)
-
-    def __attrs_post_init__(self):
-
+    def __init__(
+        self,
+        D,
+        inducing_points,
+        strategy=CholeskyVariationalDistribution,
+        mean=ConstantMean,
+        kernel=lambda: ScaleKernel(RQKernel()),
+    ):
         size = torch.Size([])
-        if self.D > 1:
-            size = torch.Size([self.D])
+        if D > 1:
+            size = torch.Size([D])
 
-        variational_distribution = self.distribution(
-            self.inducing.size(-2), batch_shape=size
-        )
+        variational_distribution = strategy(inducing_points.size(-2), batch_shape=size)
         variational_strategy = VariationalStrategy(
             self,
-            self.inducing,
+            inducing_points,
             variational_distribution,
-            learn_inducing_locations=self.learn_inducing_locations,
+            learn_inducing_locations=True,
         )
 
-        if self.D > 1:
+        if D > 1:
             variational_strategy = IndependentMultitaskVariationalStrategy(
-                variational_strategy, num_tasks=self.D
+                variational_strategy, num_tasks=D
             )
 
-        ApproximateGP.__init__(self, variational_strategy)
+        super(Phenotype, self).__init__(variational_strategy)
 
-        self.mean_module = self.mean(batch_shape=size)
+        self.mean = mean(batch_shape=size)
+        self.kernel = kernel()
+        self.D = D
+        self.K = inducing_points.size(-1)
 
     def forward(self, z):
-        mean_x = self.mean_module(z)
+        mean_x = self.mean(z)
         covar_x = self.kernel(z)
         return MultivariateNormal(mean_x, covar_x)
 
@@ -74,3 +68,26 @@ def loss(self, *args, **kwargs):
         from lantern.loss import ELBO_GP
 
         return ELBO_GP.fromGP(self, *args, **kwargs)
+
+    @classmethod
+    def fromDataset(cls, ds, K, Ni=800, inducScale=10, *args, **kwargs):
+        """Build a phenotype surface from a dataset.
+
+        :param ds: Dataset for build a phenotype from.
+        :type ds: lantern.dataset.Dataset
+        :param K: Number of latent dimesions
+        :type K: int
+        :param Ni: Number of inducing points
+        :type Ni: int
+        :param inducScale: Range to initialize inducing points over (uniform from [-inducScale, inducScale])
+        :type inducScale: float
+        """
+        D = ds.D
+        if D > 1:
+            shape = (D, Ni, K)
+        else:
+            shape = (Ni, K)
+
+        return cls(
+            D, -inducScale + 2 * inducScale * torch.rand(*shape), *args, **kwargs
+        )
diff --git a/src/lantern/model/surface/surface.py b/src/lantern/model/surface/surface.py
@@ -1,9 +1,10 @@
-from torch import nn
 import attr
 
+from lantern.module import Module
+
 
 @attr.s
-class Surface(nn.Module):
+class Surface(Module):
 
     D: int = attr.ib()
 

diff --git a/src/lantern/module.py b/src/lantern/module.py
@@ -0,0 +1,16 @@
+from torch import nn
+import attr
+
+
+@attr.s()
+class Module(nn.Module):
+    """A base module for lantern components
+
+    This module is necessary to play nicely b/w attrs and
+    pytorch. Some discussion is available here:
+    https://github.com/python-attrs/attrs/issues/393#issuecomment-510148031
+    """
+
+    def __attrs_pre_init__(self):
+        # torch module is initialized before assigning attributes
+        nn.Module.__init__(self)
diff --git a/tests/test_phenotype.py b/tests/test_phenotype.py
@@ -0,0 +1,96 @@
+from gpytorch.models import ApproximateGP
+from gpytorch.variational import CholeskyVariationalDistribution
+from gpytorch.variational import VariationalStrategy
+from gpytorch.distributions import MultivariateNormal, MultitaskMultivariateNormal
+from gpytorch.variational import IndependentMultitaskVariationalStrategy
+from gpytorch.means import ConstantMean, Mean
+from gpytorch.kernels import Kernel, ScaleKernel, RQKernel
+import pandas as pd
+import numpy as np
+import torch
+
+from lantern.model.surface import Phenotype
+from lantern.loss import ELBO_GP
+from lantern.dataset import Dataset
+
+
+def test_1d():
+
+    induc = torch.rand(100, 10)
+    phen = Phenotype(1, induc)
+
+    assert type(phen.variational_strategy) == VariationalStrategy
+
+    mvn = phen(torch.rand(50, 10))
+    assert type(mvn) == MultivariateNormal
+    assert mvn.mean.shape == (50,)
+
+    assert np.allclose(induc.numpy(), phen._get_induc())
+
+    induc = torch.rand(100, 10)
+    phen._set_induc(induc.numpy())
+    assert np.allclose(induc.numpy(), phen._get_induc())
+
+
+def test_multid():
+
+    induc = torch.rand(100, 10)
+    phen = Phenotype(4, induc)
+
+    assert type(phen.variational_strategy) == IndependentMultitaskVariationalStrategy
+
+    mvn = phen(torch.rand(50, 10))
+    assert type(mvn) == MultitaskMultivariateNormal
+    assert mvn.mean.shape == (50, 4)
+
+    assert np.allclose(induc.numpy(), phen._get_induc())
+
+    induc = torch.rand(100, 10)
+    phen._set_induc(induc.numpy())
+    assert np.allclose(induc.numpy(), phen._get_induc())
+
+
+def test_loss():
+
+    induc = torch.rand(100, 10)
+    phen = Phenotype(1, induc)
+    loss = phen.loss(N=1000)
+    assert type(loss) == ELBO_GP
+
+    mvn = phen(torch.randn(50, 10))
+
+    lss = loss(mvn, torch.randn(50))
+    assert "neg-loglikelihood" in lss
+    assert "neg-log-gp-prior" in lss
+    assert "gp-kl" in lss
+
+
+def test_ds_construct_1d():
+
+    df = pd.DataFrame(
+        {"substitutions": ["a1b", "c2d"], "phenotype": [0.0, 1.0], "error": [0.1, 0.2],}
+    )
+    ds = Dataset(df)
+    phen = Phenotype.fromDataset(ds, 10)
+
+    assert phen.K == 10
+    assert phen.D == 1
+
+
+def test_ds_construct_multid():
+
+    df = pd.DataFrame(
+        {
+            "substitutions": ["a1b", "c2d"],
+            "p1": [0.0, 1.0],
+            "p2": [1.0, 0.0],
+            "e1": [0.1, 0.2],
+            "e2": [0.2, 0.1],
+        }
+    )
+
+    ds = Dataset(df, phenotypes=["p1", "p2"], errors=["e1", "e2"])
+    phen = Phenotype.fromDataset(ds, 10)
+
+    assert phen.K == 10
+    assert phen.D == 2
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from lantern.loss.loss import Loss, Term
		from lantern.loss.elbo_gp import ELBO_GP
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from lantern.model.surface.surface import Surface
		from lantern.model.surface.phenotype import Phenotype