diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 9e9f28d..a738e81 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,4 +1,4 @@ -- bump: patch +- bump: minor changes: added: - - Changelog bug fixes + - PyTorch testing with Microsimulation datasets diff --git a/reweight/tests/test_us_prototype.py b/reweight/tests/test_us_prototype.py new file mode 100644 index 0000000..2f2a470 --- /dev/null +++ b/reweight/tests/test_us_prototype.py @@ -0,0 +1,71 @@ +def test_us_prototype(): + from policyengine_us import Microsimulation, Simulation + import numpy as np + import torch + from torch.utils.tensorboard import SummaryWriter + + #Initialize a TensorBoard writer + writer = SummaryWriter() + + #Create a Microsimulation instance + sim = Microsimulation() + + #Compute income and payroll taxes. These are MicroSeries objects from the microdf library + income_tax_microseries = sim.calculate("income_tax", 2023, map_to="household") + payroll_tax_microseries = sim.calculate("employee_payroll_tax", 2023, map_to="household") + + #Convert them into usable NumPy arrays + sim_income_tax = np.array(income_tax_microseries) + sim_payroll_tax = np.array(payroll_tax_microseries) + sim_weights = np.array(income_tax_microseries.weights) + + log_weights = np.log(sim_weights) + + #Initialize usable ground truth income and payroll tax values + targets = np.array([2_176_000_000_000, 1_614_454_000_000]) + target_names = np.array( + ["income tax revenue", "payroll tax revenue"] + ) + + sim_matrix = np.array([sim_income_tax, sim_payroll_tax]) + + #sim_matrix (cross) exp(log_weights) = targets + log_weights = torch.tensor( + log_weights, dtype=torch.float32, requires_grad=True + ) + sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32) + targets = torch.tensor(targets, dtype=torch.float32) + + optimizer = torch.optim.Adam([log_weights]) + + # Training loop + num_epochs = 20000 + for epoch in range(num_epochs): + + # Estimate the targets + targets_estimate = sim_matrix @ torch.exp(log_weights) + # Calculate the loss + loss = torch.mean((targets_estimate - targets) ** 2) + + writer.add_scalar("Loss/train", loss, epoch) + + optimizer.zero_grad() + + # Perform backpropagation + loss.backward() + + # Update weights + optimizer.step() + + # Print loss for every 1000 epochs + if epoch % 1000 == 0: + print(f"Epoch {epoch}, Loss: {loss.item()}") + + writer.flush() + + final_weights = np.exp(log_weights.detach().numpy()) + final_estimates = np.array([sim_income_tax, sim_payroll_tax]) @ final_weights + true_values = targets + print("Final weights:", final_weights) + print("Final estimates:", final_estimates) + print("True values:", true_values) diff --git a/setup.py b/setup.py index 6c39c5b..8a289e1 100644 --- a/setup.py +++ b/setup.py @@ -26,13 +26,14 @@ url="https://github.com/PolicyEngine/reweight", include_package_data=True, # Will read MANIFEST.in install_requires=[ - "numpy~=1.26.4", + "numpy<2.0", "pandas", "torch", "tensorboard", "jupyter-book", "pytest", - "policyengine-core", + "policyengine-core~=2.21.8", + "policyengine-us~=0.794.1", ], extras_require={ "dev": [