Skip to content

Commit

Permalink
Merge pull request #11 from PolicyEngine/uk-addition
Browse files Browse the repository at this point in the history
Adding PolicyEngine UK functionality, including microdata testing
  • Loading branch information
nikhilwoodruff authored Jul 16, 2024
2 parents 1d39b67 + b513ff3 commit e43d080
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
run: make test
env:
POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }}
POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}}
- uses: codecov/codecov-action@v3
- name: Build package
run: make
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
run: make test
env:
POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }}
POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}}
- uses: codecov/codecov-action@v3
- name: Test documentation builds
if: matrix.os == 'ubuntu-latest'
Expand Down
10 changes: 7 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.0] - 2024-07-16 07:44:29

### Added

- PolicyEngine UK testing

## [0.2.0] - 2024-07-02 13:04:56

### Added
Expand All @@ -17,7 +23,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Basic repo



[1.0.0]: https://github.com/PolicyEngine/reweight/compare/0.2.0...1.0.0
[0.2.0]: https://github.com/PolicyEngine/reweight/compare/0.1.0...0.2.0

9 changes: 7 additions & 2 deletions changelog.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
- changes:
added:
- Basic repo
- Basic repo
date: 2024-06-26 00:00:00
version: 0.1.0
- bump: minor
changes:
added:
- PyTorch testing with Microsimulation datasets
- PyTorch testing with Microsimulation datasets
date: 2024-07-02 13:04:56
- bump: minor
changes:
added:
- PolicyEngine UK testing
date: 2024-07-16 07:44:29
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
fixed:
- PolicyEngine UK secrets issues
8 changes: 8 additions & 0 deletions reweight/tests/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ def test_install():
import reweight
except:
raise AssertionError("Failed to build reweight")


def test_secret_usage():
import os

token = os.environ["POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN"]
token_not_none = token != None
assert token_not_none, "Authentication token is None"
5 changes: 5 additions & 0 deletions reweight/tests/test_uk_prototype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def test_uk_microsimulation():
from policyengine_uk import Microsimulation

# Create a Microsimulation instance
sim = Microsimulation()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"pytest",
"policyengine-core~=2.21.8",
"policyengine-us~=0.794.1",
"policyengine-uk",
],
extras_require={
"dev": [
Expand Down
191 changes: 191 additions & 0 deletions test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk import Microsimulation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"token = os.environ[\"POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN\"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"sim = Microsimulation()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk.data import RawFRS_2021_22\n",
"RawFRS_2021_22().download()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables\n",
"\n",
"(\n",
" household_weights,\n",
" weight_adjustment,\n",
" values_df,\n",
" targets,\n",
" targets_array,\n",
" equivalisation_factors_array\n",
") = generate_model_variables(\"frs_2021\", 2025)\n",
"\n",
"#This returns a set of household weights, a random tensor of the same size as the weights tensor,\n",
"#a Pandas dataframe to transform weights into statistical predictions, a dictionary of target values,\n",
"#an array of target values, and some equivalisation factors I don't understand."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import torch\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"# Then we're working with: this new array * the weights = our estimate.\n",
"# Then our error in a prediction is based on |predicted - actual|/equivalisation factor. Square that to get\n",
"# square error, and then average to get MSE."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def calibrate(household_weights, weight_adjustment, values_df, targets, targets_array, equivalisation_factors_array):\n",
" # Initialize a TensorBoard writer\n",
" writer = SummaryWriter()\n",
"\n",
" #TODO: Write stuff here\n",
"\n",
" #Create a Torch tensor of log weights\n",
" log_weights = torch.log(household_weights)\n",
" log_weights.requires_grad_()\n",
"\n",
" sim_matrix = values_df.to_numpy()\n",
"\n",
" # sim_matrix (cross) exp(log_weights) = targets_array\n",
" sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)\n",
" #targets_array will be our target values.\n",
"\n",
" optimizer = torch.optim.Adam([log_weights])\n",
"\n",
" # Training loop\n",
" num_epochs = 1000\n",
" for epoch in range(num_epochs):\n",
"\n",
" # Estimate the targets\n",
" targets_estimate = torch.exp(log_weights) @ sim_matrix\n",
" # Calculate the loss\n",
" loss = torch.mean(((targets_estimate - targets_array)/equivalisation_factors_array) ** 2)\n",
"\n",
" writer.add_scalar(\"Loss/train\", loss, epoch)\n",
"\n",
" optimizer.zero_grad()\n",
"\n",
" # Perform backpropagation\n",
" loss.backward()\n",
"\n",
" # Update weights\n",
" optimizer.step()\n",
"\n",
" # Print loss for every 1000 epochs\n",
" if epoch % 100 == 0:\n",
" print(f\"Epoch {epoch}, Loss: {loss.item()}\")\n",
"\n",
" writer.flush()\n",
"\n",
" final_weights = np.exp(log_weights.detach().numpy())\n",
" final_estimates = (\n",
" final_weights @ sim_matrix.numpy()\n",
" )\n",
" true_values = targets\n",
" #print(\"Final weights:\", final_weights)\n",
" #print(\"Final estimates:\", final_estimates)\n",
" #print(\"True values:\", true_values)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0, Loss: 0.2274675965309143\n",
"Epoch 100, Loss: 0.18678854405879974\n",
"Epoch 200, Loss: 0.15837892889976501\n",
"Epoch 300, Loss: 0.13632304966449738\n",
"Epoch 400, Loss: 0.11881797015666962\n",
"Epoch 500, Loss: 0.1046074628829956\n",
"Epoch 600, Loss: 0.09283030778169632\n",
"Epoch 700, Loss: 0.08289289474487305\n",
"Epoch 800, Loss: 0.0743781179189682\n",
"Epoch 900, Loss: 0.06698659062385559\n"
]
}
],
"source": [
"calibrate(household_weights,\n",
" weight_adjustment,\n",
" values_df,\n",
" targets,\n",
" targets_array,\n",
" equivalisation_factors_array)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "policyengine",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit e43d080

Please sign in to comment.