-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from PolicyEngine/uk-addition
Adding PolicyEngine UK functionality, including microdata testing
- Loading branch information
Showing
9 changed files
with
225 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,15 @@ | ||
- changes: | ||
added: | ||
- Basic repo | ||
- Basic repo | ||
date: 2024-06-26 00:00:00 | ||
version: 0.1.0 | ||
- bump: minor | ||
changes: | ||
added: | ||
- PyTorch testing with Microsimulation datasets | ||
- PyTorch testing with Microsimulation datasets | ||
date: 2024-07-02 13:04:56 | ||
- bump: minor | ||
changes: | ||
added: | ||
- PolicyEngine UK testing | ||
date: 2024-07-16 07:44:29 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
- bump: minor | ||
changes: | ||
fixed: | ||
- PolicyEngine UK secrets issues |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
def test_uk_microsimulation(): | ||
from policyengine_uk import Microsimulation | ||
|
||
# Create a Microsimulation instance | ||
sim = Microsimulation() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from policyengine_uk import Microsimulation" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"token = os.environ[\"POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"sim = Microsimulation()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from policyengine_uk.data import RawFRS_2021_22\n", | ||
"RawFRS_2021_22().download()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables\n", | ||
"\n", | ||
"(\n", | ||
" household_weights,\n", | ||
" weight_adjustment,\n", | ||
" values_df,\n", | ||
" targets,\n", | ||
" targets_array,\n", | ||
" equivalisation_factors_array\n", | ||
") = generate_model_variables(\"frs_2021\", 2025)\n", | ||
"\n", | ||
"#This returns a set of household weights, a random tensor of the same size as the weights tensor,\n", | ||
"#a Pandas dataframe to transform weights into statistical predictions, a dictionary of target values,\n", | ||
"#an array of target values, and some equivalisation factors I don't understand." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import torch\n", | ||
"from torch.utils.tensorboard import SummaryWriter\n", | ||
"\n", | ||
"# Then we're working with: this new array * the weights = our estimate.\n", | ||
"# Then our error in a prediction is based on |predicted - actual|/equivalisation factor. Square that to get\n", | ||
"# square error, and then average to get MSE." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 29, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def calibrate(household_weights, weight_adjustment, values_df, targets, targets_array, equivalisation_factors_array):\n", | ||
" # Initialize a TensorBoard writer\n", | ||
" writer = SummaryWriter()\n", | ||
"\n", | ||
" #TODO: Write stuff here\n", | ||
"\n", | ||
" #Create a Torch tensor of log weights\n", | ||
" log_weights = torch.log(household_weights)\n", | ||
" log_weights.requires_grad_()\n", | ||
"\n", | ||
" sim_matrix = values_df.to_numpy()\n", | ||
"\n", | ||
" # sim_matrix (cross) exp(log_weights) = targets_array\n", | ||
" sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)\n", | ||
" #targets_array will be our target values.\n", | ||
"\n", | ||
" optimizer = torch.optim.Adam([log_weights])\n", | ||
"\n", | ||
" # Training loop\n", | ||
" num_epochs = 1000\n", | ||
" for epoch in range(num_epochs):\n", | ||
"\n", | ||
" # Estimate the targets\n", | ||
" targets_estimate = torch.exp(log_weights) @ sim_matrix\n", | ||
" # Calculate the loss\n", | ||
" loss = torch.mean(((targets_estimate - targets_array)/equivalisation_factors_array) ** 2)\n", | ||
"\n", | ||
" writer.add_scalar(\"Loss/train\", loss, epoch)\n", | ||
"\n", | ||
" optimizer.zero_grad()\n", | ||
"\n", | ||
" # Perform backpropagation\n", | ||
" loss.backward()\n", | ||
"\n", | ||
" # Update weights\n", | ||
" optimizer.step()\n", | ||
"\n", | ||
" # Print loss for every 1000 epochs\n", | ||
" if epoch % 100 == 0:\n", | ||
" print(f\"Epoch {epoch}, Loss: {loss.item()}\")\n", | ||
"\n", | ||
" writer.flush()\n", | ||
"\n", | ||
" final_weights = np.exp(log_weights.detach().numpy())\n", | ||
" final_estimates = (\n", | ||
" final_weights @ sim_matrix.numpy()\n", | ||
" )\n", | ||
" true_values = targets\n", | ||
" #print(\"Final weights:\", final_weights)\n", | ||
" #print(\"Final estimates:\", final_estimates)\n", | ||
" #print(\"True values:\", true_values)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 30, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Epoch 0, Loss: 0.2274675965309143\n", | ||
"Epoch 100, Loss: 0.18678854405879974\n", | ||
"Epoch 200, Loss: 0.15837892889976501\n", | ||
"Epoch 300, Loss: 0.13632304966449738\n", | ||
"Epoch 400, Loss: 0.11881797015666962\n", | ||
"Epoch 500, Loss: 0.1046074628829956\n", | ||
"Epoch 600, Loss: 0.09283030778169632\n", | ||
"Epoch 700, Loss: 0.08289289474487305\n", | ||
"Epoch 800, Loss: 0.0743781179189682\n", | ||
"Epoch 900, Loss: 0.06698659062385559\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"calibrate(household_weights,\n", | ||
" weight_adjustment,\n", | ||
" values_df,\n", | ||
" targets,\n", | ||
" targets_array,\n", | ||
" equivalisation_factors_array)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "policyengine", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |