From 7222ac1d76e81a6e4fa68eda3263f975dfa23bf1 Mon Sep 17 00:00:00 2001
From: rfl-urbaniak <rfl.urbaniak@gmail.com>
Date: Fri, 18 Oct 2024 11:43:32 -0400
Subject: [PATCH] draft outline for Nikodem

---
 docs/guides/zoning-tracts-model.ipynb | 178 ++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)
 create mode 100644 docs/guides/zoning-tracts-model.ipynb

diff --git a/docs/guides/zoning-tracts-model.ipynb b/docs/guides/zoning-tracts-model.ipynb
new file mode 100644
index 00000000..d3b33dd3
--- /dev/null
+++ b/docs/guides/zoning-tracts-model.ipynb
@@ -0,0 +1,178 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Outline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Background and motivations\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#check out zoning_new_data_pipeline\n",
+    "use viz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "#this comes from test_tracts_model.py\n",
+    "\n",
+    "data_path = os.path.join(root, \"data/minneapolis/processed/pg_census_tracts_dataset.pt\")\n",
+    "\n",
+    "dataset_read = torch.load(data_path, weights_only=False)\n",
+    "\n",
+    "loader = DataLoader(dataset_read, batch_size=len(dataset_read), shuffle=True)\n",
+    "\n",
+    "data = next(iter(loader))\n",
+    "\n",
+    "\n",
+    "kwargs = {\n",
+    "    \"categorical\": [\"year\", \"census_tract\"],\n",
+    "    \"continuous\": {\n",
+    "        \"housing_units\",\n",
+    "        \"total_value\",\n",
+    "        \"median_value\",\n",
+    "        \"mean_limit_original\",\n",
+    "        \"median_distance\",\n",
+    "        \"income\",\n",
+    "        \"segregation_original\",\n",
+    "        \"white_original\",\n",
+    "        \"parcel_mean_sqm\",\n",
+    "        \"parcel_median_sqm\",\n",
+    "        \"parcel_sqm\",\n",
+    "        \"downtown_overlap\",\n",
+    "        \"university_overlap\",\n",
+    "    },\n",
+    "    \"outcome\": \"housing_units\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "pg_subset = select_from_data(data, kwargs)\n",
+    "pg_dataset_read = torch.load(data_path, weights_only=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## tytulem wstepu, dane z permitow, tak jak w zoning data, babelki i media"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Zmienne"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Causal modeling in general\n",
+    "\n",
+    "tracts model overview, read, possibly update, \n",
+    "the graphics is outdated, generate new one using ....dags.R\n",
+    "looking at the rendering from zoning_tracts_continuous_interactions.ipynb\n",
+    "\n",
+    "defined in \n",
+    "\n",
+    "zoning_tracts_continuous_interactions_model.py\n",
+    "\n",
+    "btw update tracts_model_overiew with new graphics\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Construction and evaluation\n",
+    "\n",
+    "- directions of causal assumptions are rather natural, were happy for the user to modify and iterate\n",
+    "- in adding variables we were frugal, at each step evaluating the model in terms of train-test split\n",
+    "and WAIC \n",
+    "\n",
+    "-explain interactions as essentially adding another continuous predictor\n",
+    "- explain waic briefly as well\n",
+    "\n",
+    "example of performance results, also with the original scale\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Outliers\n",
+    "\n",
+    "\n",
+    "\n",
+    "messy environment, high granularity, hard to predict some extreme events\n",
+    "in particular, the reform does not touch university and downtown, which had their own regulation\n",
+    "especially downtown underwent modifications not captured by the data, \n",
+    "\n",
+    "graph residuals for regions\n",
+    "\n",
+    "statsy outlierow "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Interventions\n",
+    "\n",
+    "ogolnie co to jest interwencja w tym kontekscie\n",
+    "\n",
+    "### Brute force example\n",
+    "\n",
+    "wszedzie zero wszedzie 1, porownanie\n",
+    "\n",
+    "### In line with the reform\n",
+    "\n",
+    "predict.py contains sql\n",
+    "\n",
+    "zoning_tracts_intervention_testing.inpyb\n",
+    "\n",
+    "zawiera kilka  najgorszych outlierow (ktore jako przyklady bez interwencji wczesniej mozna podac)\n",
+    "\n",
+    "I wyjasnic roznice miedzy observed, factual, counterfactual"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}