Added PyTorch testing with Microsimulation datasets

PolicyEngine · Jul 2, 2024 · 2bb009f · 2bb009f
1 parent e2f127c
commit 2bb009f
Show file tree

Hide file tree

Showing 3 changed files with 76 additions and 4 deletions.
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -1,4 +1,4 @@
-- bump: patch
+- bump: minor
   changes:
     added:
-      - Changelog bug fixes
+      - PyTorch testing with Microsimulation datasets
diff --git a/reweight/tests/test_us_prototype.py b/reweight/tests/test_us_prototype.py
@@ -0,0 +1,71 @@
+def test_us_prototype():
+    from policyengine_us import Microsimulation, Simulation
+    import numpy as np    
+    import torch
+    from torch.utils.tensorboard import SummaryWriter
+
+    #Initialize a TensorBoard writer
+    writer = SummaryWriter()
+
+    #Create a Microsimulation instance
+    sim = Microsimulation()
+
+    #Compute income and payroll taxes. These are MicroSeries objects from the microdf library
+    income_tax_microseries = sim.calculate("income_tax", 2023, map_to="household")
+    payroll_tax_microseries = sim.calculate("employee_payroll_tax", 2023, map_to="household")
+
+    #Convert them into usable NumPy arrays
+    sim_income_tax = np.array(income_tax_microseries)
+    sim_payroll_tax = np.array(payroll_tax_microseries)
+    sim_weights = np.array(income_tax_microseries.weights)
+
+    log_weights = np.log(sim_weights)
+
+    #Initialize usable ground truth income and payroll tax values
+    targets = np.array([2_176_000_000_000, 1_614_454_000_000])
+    target_names = np.array(
+        ["income tax revenue", "payroll tax revenue"]
+    )
+
+    sim_matrix = np.array([sim_income_tax, sim_payroll_tax])
+
+    #sim_matrix (cross) exp(log_weights) = targets
+    log_weights = torch.tensor(
+        log_weights, dtype=torch.float32, requires_grad=True
+    )
+    sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)
+    targets = torch.tensor(targets, dtype=torch.float32)
+
+    optimizer = torch.optim.Adam([log_weights])
+
+    # Training loop
+    num_epochs = 20000
+    for epoch in range(num_epochs):
+
+        # Estimate the targets
+        targets_estimate = sim_matrix @ torch.exp(log_weights)
+        # Calculate the loss
+        loss = torch.mean((targets_estimate - targets) ** 2)
+
+        writer.add_scalar("Loss/train", loss, epoch)
+
+        optimizer.zero_grad()
+
+        # Perform backpropagation
+        loss.backward()
+
+        # Update weights
+        optimizer.step()
+
+        # Print loss for every 1000 epochs
+        if epoch % 1000 == 0:
+            print(f"Epoch {epoch}, Loss: {loss.item()}")
+
+    writer.flush()
+
+    final_weights = np.exp(log_weights.detach().numpy())
+    final_estimates = np.array([sim_income_tax, sim_payroll_tax]) @ final_weights
+    true_values = targets
+    print("Final weights:", final_weights)
+    print("Final estimates:", final_estimates)
+    print("True values:", true_values)
diff --git a/setup.py b/setup.py
@@ -26,13 +26,14 @@
     url="https://github.com/PolicyEngine/reweight",
     include_package_data=True,  # Will read MANIFEST.in
     install_requires=[
-        "numpy~=1.26.4",
+        "numpy<2.0",
         "pandas",
         "torch",
         "tensorboard",
         "jupyter-book",
         "pytest",
-        "policyengine-core",
+        "policyengine-core~=2.21.8",
+        "policyengine-us~=0.794.1",
     ],
     extras_require={
         "dev": [