Skip to content

Commit

Permalink
Added PyTorch testing with Microsimulation datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
= committed Jul 2, 2024
1 parent e2f127c commit 2bb009f
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 4 deletions.
4 changes: 2 additions & 2 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- bump: patch
- bump: minor
changes:
added:
- Changelog bug fixes
- PyTorch testing with Microsimulation datasets
71 changes: 71 additions & 0 deletions reweight/tests/test_us_prototype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
def test_us_prototype():
from policyengine_us import Microsimulation, Simulation
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter

#Initialize a TensorBoard writer
writer = SummaryWriter()

#Create a Microsimulation instance
sim = Microsimulation()

#Compute income and payroll taxes. These are MicroSeries objects from the microdf library
income_tax_microseries = sim.calculate("income_tax", 2023, map_to="household")
payroll_tax_microseries = sim.calculate("employee_payroll_tax", 2023, map_to="household")

#Convert them into usable NumPy arrays
sim_income_tax = np.array(income_tax_microseries)
sim_payroll_tax = np.array(payroll_tax_microseries)
sim_weights = np.array(income_tax_microseries.weights)

log_weights = np.log(sim_weights)

#Initialize usable ground truth income and payroll tax values
targets = np.array([2_176_000_000_000, 1_614_454_000_000])
target_names = np.array(
["income tax revenue", "payroll tax revenue"]
)

sim_matrix = np.array([sim_income_tax, sim_payroll_tax])

#sim_matrix (cross) exp(log_weights) = targets
log_weights = torch.tensor(
log_weights, dtype=torch.float32, requires_grad=True
)
sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)
targets = torch.tensor(targets, dtype=torch.float32)

optimizer = torch.optim.Adam([log_weights])

# Training loop
num_epochs = 20000
for epoch in range(num_epochs):

# Estimate the targets
targets_estimate = sim_matrix @ torch.exp(log_weights)
# Calculate the loss
loss = torch.mean((targets_estimate - targets) ** 2)

writer.add_scalar("Loss/train", loss, epoch)

optimizer.zero_grad()

# Perform backpropagation
loss.backward()

# Update weights
optimizer.step()

# Print loss for every 1000 epochs
if epoch % 1000 == 0:
print(f"Epoch {epoch}, Loss: {loss.item()}")

writer.flush()

final_weights = np.exp(log_weights.detach().numpy())
final_estimates = np.array([sim_income_tax, sim_payroll_tax]) @ final_weights
true_values = targets
print("Final weights:", final_weights)
print("Final estimates:", final_estimates)
print("True values:", true_values)
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
url="https://github.com/PolicyEngine/reweight",
include_package_data=True, # Will read MANIFEST.in
install_requires=[
"numpy~=1.26.4",
"numpy<2.0",
"pandas",
"torch",
"tensorboard",
"jupyter-book",
"pytest",
"policyengine-core",
"policyengine-core~=2.21.8",
"policyengine-us~=0.794.1",
],
extras_require={
"dev": [
Expand Down

0 comments on commit 2bb009f

Please sign in to comment.