Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding PolicyEngine UK functionality, including microdata testing #11

Merged
merged 19 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
run: make test
env:
POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }}
POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}}
- uses: codecov/codecov-action@v3
- name: Build package
run: make
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
run: make test
env:
POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }}
POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}}
- uses: codecov/codecov-action@v3
- name: Test documentation builds
if: matrix.os == 'ubuntu-latest'
Expand Down
10 changes: 7 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.0] - 2024-07-16 07:44:29

### Added

- PolicyEngine UK testing

## [0.2.0] - 2024-07-02 13:04:56

### Added
Expand All @@ -17,7 +23,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Basic repo



[1.0.0]: https://github.com/PolicyEngine/reweight/compare/0.2.0...1.0.0
[0.2.0]: https://github.com/PolicyEngine/reweight/compare/0.1.0...0.2.0

9 changes: 7 additions & 2 deletions changelog.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
- changes:
added:
- Basic repo
- Basic repo
date: 2024-06-26 00:00:00
version: 0.1.0
- bump: minor
changes:
added:
- PyTorch testing with Microsimulation datasets
- PyTorch testing with Microsimulation datasets
date: 2024-07-02 13:04:56
- bump: minor
changes:
added:
- PolicyEngine UK testing
date: 2024-07-16 07:44:29
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
fixed:
- PolicyEngine UK secrets issues
8 changes: 8 additions & 0 deletions reweight/tests/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ def test_install():
import reweight
except:
raise AssertionError("Failed to build reweight")


def test_secret_usage():
import os

token = os.environ["POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN"]
token_not_none = token != None
assert token_not_none, "Authentication token is None"
5 changes: 5 additions & 0 deletions reweight/tests/test_uk_prototype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def test_uk_microsimulation():
from policyengine_uk import Microsimulation

# Create a Microsimulation instance
sim = Microsimulation()
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"pytest",
"policyengine-core~=2.21.8",
"policyengine-us~=0.794.1",
"policyengine-uk",
],
extras_require={
"dev": [
Expand Down
191 changes: 191 additions & 0 deletions test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk import Microsimulation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"token = os.environ[\"POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN\"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"sim = Microsimulation()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk.data import RawFRS_2021_22\n",
"RawFRS_2021_22().download()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables\n",
"\n",
"(\n",
" household_weights,\n",
" weight_adjustment,\n",
" values_df,\n",
" targets,\n",
" targets_array,\n",
" equivalisation_factors_array\n",
") = generate_model_variables(\"frs_2021\", 2025)\n",
"\n",
"#This returns a set of household weights, a random tensor of the same size as the weights tensor,\n",
"#a Pandas dataframe to transform weights into statistical predictions, a dictionary of target values,\n",
"#an array of target values, and some equivalisation factors I don't understand."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import torch\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"\n",
"# Then we're working with: this new array * the weights = our estimate.\n",
"# Then our error in a prediction is based on |predicted - actual|/equivalisation factor. Square that to get\n",
"# square error, and then average to get MSE."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def calibrate(household_weights, weight_adjustment, values_df, targets, targets_array, equivalisation_factors_array):\n",
" # Initialize a TensorBoard writer\n",
" writer = SummaryWriter()\n",
"\n",
" #TODO: Write stuff here\n",
"\n",
" #Create a Torch tensor of log weights\n",
" log_weights = torch.log(household_weights)\n",
" log_weights.requires_grad_()\n",
"\n",
" sim_matrix = values_df.to_numpy()\n",
"\n",
" # sim_matrix (cross) exp(log_weights) = targets_array\n",
" sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)\n",
" #targets_array will be our target values.\n",
"\n",
" optimizer = torch.optim.Adam([log_weights])\n",
"\n",
" # Training loop\n",
" num_epochs = 1000\n",
" for epoch in range(num_epochs):\n",
"\n",
" # Estimate the targets\n",
" targets_estimate = torch.exp(log_weights) @ sim_matrix\n",
" # Calculate the loss\n",
" loss = torch.mean(((targets_estimate - targets_array)/equivalisation_factors_array) ** 2)\n",
"\n",
" writer.add_scalar(\"Loss/train\", loss, epoch)\n",
"\n",
" optimizer.zero_grad()\n",
"\n",
" # Perform backpropagation\n",
" loss.backward()\n",
"\n",
" # Update weights\n",
" optimizer.step()\n",
"\n",
" # Print loss for every 1000 epochs\n",
" if epoch % 100 == 0:\n",
" print(f\"Epoch {epoch}, Loss: {loss.item()}\")\n",
"\n",
" writer.flush()\n",
"\n",
" final_weights = np.exp(log_weights.detach().numpy())\n",
" final_estimates = (\n",
" final_weights @ sim_matrix.numpy()\n",
" )\n",
" true_values = targets\n",
" #print(\"Final weights:\", final_weights)\n",
" #print(\"Final estimates:\", final_estimates)\n",
" #print(\"True values:\", true_values)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0, Loss: 0.2274675965309143\n",
"Epoch 100, Loss: 0.18678854405879974\n",
"Epoch 200, Loss: 0.15837892889976501\n",
"Epoch 300, Loss: 0.13632304966449738\n",
"Epoch 400, Loss: 0.11881797015666962\n",
"Epoch 500, Loss: 0.1046074628829956\n",
"Epoch 600, Loss: 0.09283030778169632\n",
"Epoch 700, Loss: 0.08289289474487305\n",
"Epoch 800, Loss: 0.0743781179189682\n",
"Epoch 900, Loss: 0.06698659062385559\n"
]
}
],
"source": [
"calibrate(household_weights,\n",
" weight_adjustment,\n",
" values_df,\n",
" targets,\n",
" targets_array,\n",
" equivalisation_factors_array)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "policyengine",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading