From 7222ac1d76e81a6e4fa68eda3263f975dfa23bf1 Mon Sep 17 00:00:00 2001 From: rfl-urbaniak Date: Fri, 18 Oct 2024 11:43:32 -0400 Subject: [PATCH] draft outline for Nikodem --- docs/guides/zoning-tracts-model.ipynb | 178 ++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 docs/guides/zoning-tracts-model.ipynb diff --git a/docs/guides/zoning-tracts-model.ipynb b/docs/guides/zoning-tracts-model.ipynb new file mode 100644 index 00000000..d3b33dd3 --- /dev/null +++ b/docs/guides/zoning-tracts-model.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Background and motivations\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#check out zoning_new_data_pipeline\n", + "use viz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#this comes from test_tracts_model.py\n", + "\n", + "data_path = os.path.join(root, \"data/minneapolis/processed/pg_census_tracts_dataset.pt\")\n", + "\n", + "dataset_read = torch.load(data_path, weights_only=False)\n", + "\n", + "loader = DataLoader(dataset_read, batch_size=len(dataset_read), shuffle=True)\n", + "\n", + "data = next(iter(loader))\n", + "\n", + "\n", + "kwargs = {\n", + " \"categorical\": [\"year\", \"census_tract\"],\n", + " \"continuous\": {\n", + " \"housing_units\",\n", + " \"total_value\",\n", + " \"median_value\",\n", + " \"mean_limit_original\",\n", + " \"median_distance\",\n", + " \"income\",\n", + " \"segregation_original\",\n", + " \"white_original\",\n", + " \"parcel_mean_sqm\",\n", + " \"parcel_median_sqm\",\n", + " \"parcel_sqm\",\n", + " \"downtown_overlap\",\n", + " \"university_overlap\",\n", + " },\n", + " \"outcome\": \"housing_units\",\n", + "}\n", + "\n", + "\n", + "pg_subset = select_from_data(data, kwargs)\n", + "pg_dataset_read = torch.load(data_path, weights_only=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## tytulem wstepu, dane z permitow, tak jak w zoning data, babelki i media" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Zmienne" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Causal modeling in general\n", + "\n", + "tracts model overview, read, possibly update, \n", + "the graphics is outdated, generate new one using ....dags.R\n", + "looking at the rendering from zoning_tracts_continuous_interactions.ipynb\n", + "\n", + "defined in \n", + "\n", + "zoning_tracts_continuous_interactions_model.py\n", + "\n", + "btw update tracts_model_overiew with new graphics\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Construction and evaluation\n", + "\n", + "- directions of causal assumptions are rather natural, were happy for the user to modify and iterate\n", + "- in adding variables we were frugal, at each step evaluating the model in terms of train-test split\n", + "and WAIC \n", + "\n", + "-explain interactions as essentially adding another continuous predictor\n", + "- explain waic briefly as well\n", + "\n", + "example of performance results, also with the original scale\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Outliers\n", + "\n", + "\n", + "\n", + "messy environment, high granularity, hard to predict some extreme events\n", + "in particular, the reform does not touch university and downtown, which had their own regulation\n", + "especially downtown underwent modifications not captured by the data, \n", + "\n", + "graph residuals for regions\n", + "\n", + "statsy outlierow " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Interventions\n", + "\n", + "ogolnie co to jest interwencja w tym kontekscie\n", + "\n", + "### Brute force example\n", + "\n", + "wszedzie zero wszedzie 1, porownanie\n", + "\n", + "### In line with the reform\n", + "\n", + "predict.py contains sql\n", + "\n", + "zoning_tracts_intervention_testing.inpyb\n", + "\n", + "zawiera kilka najgorszych outlierow (ktore jako przyklady bez interwencji wczesniej mozna podac)\n", + "\n", + "I wyjasnic roznice miedzy observed, factual, counterfactual" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}