From 78730b10ab5c2e4f86ce691b66ff787c934f22d0 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak <rfl.urbaniak@gmail.com>
Date: Mon, 5 Aug 2024 16:21:42 -0400
Subject: [PATCH] tests for linear component

---
 tests/modeling/test_model_components.py    | 153 +++++++++++++++++++++
 tests/modeling/test_modeling_components.py |  49 -------
 2 files changed, 153 insertions(+), 49 deletions(-)
 create mode 100644 tests/modeling/test_model_components.py
 delete mode 100644 tests/modeling/test_modeling_components.py

diff --git a/tests/modeling/test_model_components.py b/tests/modeling/test_model_components.py
new file mode 100644
index 00000000..685ae4c9
--- /dev/null
+++ b/tests/modeling/test_model_components.py
@@ -0,0 +1,153 @@
+from typing import Dict
+
+import pytest
+import torch
+import pyro
+
+from cities.modeling.model_components import (get_n, categorical_contribution, continuous_contribution,
+                                              add_linear_component)
+
+
+
+@pytest.mark.parametrize(
+    "categorical, continuous, expected",
+    [
+        # both categorical and continuous
+        (
+            {"cat1": torch.tensor([1, 2, 3, 4]), "cat2": torch.tensor([1, 2, 3, 4])},
+            {"cont1": torch.tensor([0.5, 0.6, 0.7, 0.5])},
+            (2, 1, 4),
+        ),
+        # only categorical
+        ({"cat1": torch.tensor([1, 2, 3])}, {}, (1, 0, 3)),
+        # only continuous
+        ({}, {"cont1": torch.tensor([0.5, 0.6, 0.7, 0.8])}, (0, 1, 4)),
+        # mixed size categorical
+        (
+            {
+                "cat1": torch.tensor([1, 2, 3, 4, 5]),
+                "cat2": torch.tensor([1, 2, 3, 4, 5]),
+            },
+            {},
+            (2, 0, 5),
+        ),
+    ],
+)
+def test_get_n(
+    categorical: Dict[str, torch.Tensor],
+    continuous: Dict[str, torch.Tensor],
+    expected: tuple,
+):
+    assert get_n(categorical, continuous) == expected
+
+
+def test_get_n_error():
+    with pytest.raises(
+        ValueError,
+        match="The number of categorical and continuous data points must be the same",
+    ):
+        get_n(
+            {"cat1": torch.tensor([1, 2, 3, 4]), "cat2": torch.tensor([1, 2, 3, 4])},
+            {"cont1": torch.tensor([0.5, 0.6, 0.5])},
+        )
+
+
+# setup for component tests
+mock_data_cat = {"cat1": torch.tensor([2, 1, 0]), "cat2": torch.tensor([1, 0, 1])}
+mock_data_cont = {"cont1": torch.tensor([1.0, 2.0, 3.0]), "cont2": torch.tensor([4.0, 5.0, 6.0])}
+categorical_levels = {"cat1": torch.tensor([0, 1, 2]), "cat2": torch.tensor([0, 1])}
+
+
+def test_categorical_contribution():
+    
+    with pyro.poutine.trace() as tr:
+        cat_contribution = categorical_contribution(
+            mock_data_cat,
+            "child1",
+            .3,
+            None,)
+
+        weights_1 = tr.trace.nodes['weights_categorical_cat1_child1']['value']
+        assert weights_1.shape == (3,)
+
+        weights_2 = tr.trace.nodes['weights_categorical_cat2_child1']['value']
+        assert weights_2.shape == (2,)
+
+        assert torch.equal(weights_1[mock_data_cat['cat1']]+ weights_2[mock_data_cat['cat2']], cat_contribution)
+
+
+def test_continuous_contribution():
+    
+    with pyro.poutine.trace() as tr:
+        cont_contribution = continuous_contribution(
+            mock_data_cont,
+            "child1",
+            0.5
+        )
+
+        bias_cont1 = tr.trace.nodes['bias_continuous_cont1_child1']['value']
+        weight_cont1 = tr.trace.nodes['weight_continuous_cont1_child1']['value']
+        bias_cont2 = tr.trace.nodes['bias_continuous_cont2_child1']['value']
+        weight_cont2 = tr.trace.nodes['weight_continuous_cont2_child1']['value']
+
+        assert bias_cont1.shape == torch.Size([])
+        assert weight_cont1.shape == torch.Size([])
+        assert bias_cont2.shape == torch.Size([])
+        assert weight_cont2.shape == torch.Size([])
+
+        expected_contribution = (
+            bias_cont1 + weight_cont1 * mock_data_cont['cont1']
+        ) + (
+            bias_cont2 + weight_cont2 * mock_data_cont['cont2']
+        )
+
+        assert torch.allclose(cont_contribution, expected_contribution)
+
+
+def test_add_linear_component():
+
+    data_plate = pyro.plate("data_plate", 3)
+
+    with pyro.poutine.trace() as tr:
+        observed = add_linear_component(
+            child_name="child1",
+            child_continuous_parents=mock_data_cont,
+            child_categorical_parents=mock_data_cat,
+            leeway=0.5,
+            data_plate=data_plate,
+            observations=None,
+            categorical_levels= categorical_levels
+        )
+
+    sigma_child = tr.trace.nodes[f"sigma_child1"]["value"]
+    mean_prediction_child = tr.trace.nodes[f"mean_outcome_prediction_child1"]["value"]
+
+    sigma_child = tr.trace.nodes[f"sigma_child1"]["value"]
+    mean_prediction_child = tr.trace.nodes[f"mean_outcome_prediction_child1"]["value"]
+
+    assert sigma_child.shape == torch.Size([])
+    assert mean_prediction_child.shape == torch.Size([3])
+
+    weights_categorical = {}
+    for name in mock_data_cat.keys():
+        weights_categorical[name] = tr.trace.nodes[f"weights_categorical_{name}_child1"]["value"]
+    
+    categorical_contrib = torch.zeros(3)
+    for name, tensor in mock_data_cat.items():
+        categorical_contrib += weights_categorical[name][..., tensor]
+
+    continuous_contrib = torch.zeros(3)
+    for key, value in mock_data_cont.items():
+        bias = tr.trace.nodes[f"bias_continuous_{key}_child1"]["value"]
+        weight = tr.trace.nodes[f"weight_continuous_{key}_child1"]["value"]
+        continuous_contrib += bias + weight * value
+
+  
+    expected_mean_prediction = categorical_contrib + continuous_contrib
+
+    assert torch.allclose(mean_prediction_child, expected_mean_prediction, atol=1e-6)
+
+
+  
+
+test_add_linear_component()
\ No newline at end of file
diff --git a/tests/modeling/test_modeling_components.py b/tests/modeling/test_modeling_components.py
deleted file mode 100644
index c9466589..00000000
--- a/tests/modeling/test_modeling_components.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from typing import Dict
-
-import pytest
-import torch
-
-from cities.modeling.model_components import get_n
-
-
-@pytest.mark.parametrize(
-    "categorical, continuous, expected",
-    [
-        # both categorical and continuous
-        (
-            {"cat1": torch.tensor([1, 2, 3, 4]), "cat2": torch.tensor([1, 2, 3, 4])},
-            {"cont1": torch.tensor([0.5, 0.6, 0.7, 0.5])},
-            (2, 1, 4),
-        ),
-        # only categorical
-        ({"cat1": torch.tensor([1, 2, 3])}, {}, (1, 0, 3)),
-        # only continuous
-        ({}, {"cont1": torch.tensor([0.5, 0.6, 0.7, 0.8])}, (0, 1, 4)),
-        # mixed size categorical
-        (
-            {
-                "cat1": torch.tensor([1, 2, 3, 4, 5]),
-                "cat2": torch.tensor([1, 2, 3, 4, 5]),
-            },
-            {},
-            (2, 0, 5),
-        ),
-    ],
-)
-def test_get_n(
-    categorical: Dict[str, torch.Tensor],
-    continuous: Dict[str, torch.Tensor],
-    expected: tuple,
-):
-    assert get_n(categorical, continuous) == expected
-
-
-def test_get_n_error():
-    with pytest.raises(
-        ValueError,
-        match="The number of categorical and continuous data points must be the same",
-    ):
-        get_n(
-            {"cat1": torch.tensor([1, 2, 3, 4]), "cat2": torch.tensor([1, 2, 3, 4])},
-            {"cont1": torch.tensor([0.5, 0.6, 0.5])},
-        )