From 0fdfe83518eb28fa14946aee6011a1c31a73d53c Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:20:13 +0100 Subject: [PATCH 01/22] add new methods to the notebooks and readme Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c276c4ae..6bec2a5b 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,8 @@ print(f"Best estimator: {ct.best_estimator}") ``` +Now if outcome_model="auto" in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by outcome_model="nested" (Refitting AutoML for each estimator). + ## Supported Models The package supports the following causal estimators: * Meta Learners: From 311f264096a2dc18c4906a3cbca50249da857eb5 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:25:00 +0100 Subject: [PATCH 02/22] Update README.md Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6bec2a5b..77985b41 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,9 @@ print(f"Best estimator: {ct.best_estimator}") ``` -Now if outcome_model="auto" in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by outcome_model="nested" (Refitting AutoML for each estimator). +Now if ***outcome_model="auto"*** in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by ***outcome_model="nested"*** (Refitting AutoML for each estimator). + +You can also preprocess the data in the CausalityDataset using one of the popular category encoders: ***OneHot, WoE, Label, Target***. ## Supported Models The package supports the following causal estimators: From e2285ab3751641e3cbd9d364869c4440f98ef001 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:31:21 +0100 Subject: [PATCH 03/22] Delete notebooks/Standard errors.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/Standard errors.ipynb | 659 -------------------------------- 1 file changed, 659 deletions(-) delete mode 100644 notebooks/Standard errors.ipynb diff --git a/notebooks/Standard errors.ipynb b/notebooks/Standard errors.ipynb deleted file mode 100644 index 797fcea9..00000000 --- a/notebooks/Standard errors.ipynb +++ /dev/null @@ -1,659 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "a34f30c6", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Standard errors\n", - "\n", - "This is a notebook demonstrating how to obtain standard errors for your generated impact estimates." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "43b770ca", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import os, sys\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "try:\n", - " import dowhy\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", - "\n", - "try:\n", - " import flaml\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"FLAML\"))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "53241021", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# this makes the notebook expand to full width of the browser window\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5ed9b5f7", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "application/javascript": "\n// turn off scrollable windows for large output\nIPython.OutputArea.prototype._should_scroll = function(lines) {\n return false;\n}\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%javascript\n", - "\n", - "// turn off scrollable windows for large output\n", - "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", - " return false;\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "da208ce6", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "from causaltune import CausalTune\n", - "from causaltune.datasets import synth_ihdp" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "ab536d1b", - "metadata": {}, - "source": [ - "## Loading data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "96719b4d", - "metadata": {}, - "outputs": [], - "source": [ - "# load toy dataset and apply standard pre-processing\n", - "cd = synth_ihdp()\n", - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "49e4721b", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
treatmenty_factualrandomx1x2x3x4x5x6x7...x16x17x18x19x20x21x22x23x24x25
015.5999161.0-0.528603-0.3434551.1285540.161703-0.3166031.2952161.0...1.01.01.01.00.00.00.00.00.00.0
106.8758561.0-1.736945-1.8020020.3838282.244319-0.6291891.2952160.0...1.01.01.01.00.00.00.00.00.00.0
202.9962731.0-0.807451-0.202946-0.360898-0.8796060.808706-0.5265560.0...1.00.01.01.00.00.00.00.00.00.0
301.3662060.00.3900830.596582-1.850350-0.879606-0.004017-0.8577870.0...1.00.01.01.00.00.00.00.00.00.0
401.9635381.0-1.045228-0.6027100.0114650.1617030.683672-0.3609401.0...1.01.01.01.00.00.00.00.00.00.0
\n", - "

5 rows × 28 columns

\n", - "
" - ], - "text/plain": [ - " treatment y_factual random x1 x2 x3 x4 \\\n", - "0 1 5.599916 1.0 -0.528603 -0.343455 1.128554 0.161703 \n", - "1 0 6.875856 1.0 -1.736945 -1.802002 0.383828 2.244319 \n", - "2 0 2.996273 1.0 -0.807451 -0.202946 -0.360898 -0.879606 \n", - "3 0 1.366206 0.0 0.390083 0.596582 -1.850350 -0.879606 \n", - "4 0 1.963538 1.0 -1.045228 -0.602710 0.011465 0.161703 \n", - "\n", - " x5 x6 x7 ... x16 x17 x18 x19 x20 x21 x22 x23 x24 \\\n", - "0 -0.316603 1.295216 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", - "1 -0.629189 1.295216 0.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", - "2 0.808706 -0.526556 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", - "3 -0.004017 -0.857787 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", - "4 0.683672 -0.360940 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", - "\n", - " x25 \n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 \n", - "\n", - "[5 rows x 28 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# inspect the preprocessed dataset\n", - "display(cd.data.head())" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d4d1871f", - "metadata": {}, - "source": [ - "## Model training and standard errors" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "fd4b291e", - "metadata": {}, - "outputs": [], - "source": [ - "# training configs\n", - "\n", - "# set evaluation metric\n", - "metric = \"energy_distance\"\n", - "\n", - "# it's best to specify either time_budget or components_time_budget, \n", - "# and let the other one be inferred; time in seconds\n", - "time_budget = None\n", - "components_time_budget = 10\n", - "\n", - "# specify training set size\n", - "train_size = 0.7\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e0f63d12", - "metadata": {}, - "source": [ - "Note that in the example below, we are passing `'cheap_inference'` to `estimator_list`. This configuration will restrict the selection of estimators to the ones that have analytical standard errors." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "097c923e", - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", - "Initial configs: [{'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.LinearDRLearner', 'fit_cate_intercept': True, 'min_propensity': 1e-06}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.SparseLinearDRLearner', 'fit_cate_intercept': True, 'n_alphas': 100, 'n_alphas_cov': 10, 'min_propensity': 1e-06, 'tol': 0.0001, 'max_iter': 10000, 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.LinearDML', 'fit_cate_intercept': True, 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.SparseLinearDML', 'fit_cate_intercept': True, 'n_alphas': 100, 'n_alphas_cov': 10, 'tol': 0.0001, 'max_iter': 10000, 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}]\n", - "---------------------\n", - "Best estimator: backdoor.econml.dr.ForestDRLearner\n", - "Best config: {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': 1, 'subforest_size': 4}}\n", - "Best score: 0.28241795991132435\n" - ] - } - ], - "source": [ - "ct = CausalTune(\n", - " estimator_list='cheap_inference',\n", - " metric=metric,\n", - " verbose=0,\n", - " components_verbose=0,\n", - " time_budget=time_budget,\n", - " components_time_budget=components_time_budget,\n", - " train_size=train_size\n", - ")\n", - "\n", - "\n", - "# run causaltune\n", - "ct.fit(data=cd, outcome=cd.outcomes[0])\n", - "\n", - "print('---------------------')\n", - "# return best estimator\n", - "print(f\"Best estimator: {ct.best_estimator}\")\n", - "# config of best estimator:\n", - "print(f\"Best config: {ct.best_config}\")\n", - "# best score:\n", - "print(f\"Best score: {ct.best_score}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "dd8b4d04", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[3.08417039],\n", - " [4.10807041],\n", - " [4.32885751],\n", - " [4.53901377],\n", - " [4.19668172]])" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# obtaining effect estimates\n", - "\n", - "test_df = ct.test_df\n", - "\n", - "cates = ct.effect(test_df)\n", - "display(cates[:5,])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8c819410", - "metadata": {}, - "source": [ - "Below we show how to generate standard errors using `CausalTune.effect_stderr()`. By default, this will use the `best_estimator` identified during training.\n", - "\n", - "If this estimator does not have analytical standard errors, it will be refitted `n_bootstrap_samples`-times on the training data." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "0ee744d2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.28758771],\n", - " [0.2267228 ],\n", - " [0.29267037],\n", - " [0.22686985],\n", - " [0.28054057]])" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# generating standard errors by refitting train_df \n", - "se = ct.effect_stderr(ct.test_df)\n", - "display(se[:5,])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "9a474ab5", - "metadata": {}, - "source": [ - "In addition to merely generating standard errors, we have the option to generate various other statistical inferences for the effect, such as the standard error, z-test score, and p-value for each sample `X{i}`." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "277adbcc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
point_estimatestderrzstatpvalueci_lowerci_upper
X
03.0840.28810.7240.02.6113.557
14.1080.22718.1190.03.7354.481
24.3290.29314.7910.03.8474.810
34.5390.22720.0070.04.1664.912
44.1970.28114.9590.03.7354.658
\n", - "
" - ], - "text/plain": [ - " point_estimate stderr zstat pvalue ci_lower ci_upper\n", - "X \n", - "0 3.084 0.288 10.724 0.0 2.611 3.557\n", - "1 4.108 0.227 18.119 0.0 3.735 4.481\n", - "2 4.329 0.293 14.791 0.0 3.847 4.810\n", - "3 4.539 0.227 20.007 0.0 4.166 4.912\n", - "4 4.197 0.281 14.959 0.0 3.735 4.658" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ct.effect_inference(test_df)[0].summary_frame(alpha=0.1, value=0, decimals=3).head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "causality", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 0735a53c0184e71e35eabc86833a8d548b4c3690 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:31:52 +0100 Subject: [PATCH 04/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/Standard errors.ipynb | 654 ++++++++++++++++++++++++++++++++ 1 file changed, 654 insertions(+) create mode 100644 notebooks/Standard errors.ipynb diff --git a/notebooks/Standard errors.ipynb b/notebooks/Standard errors.ipynb new file mode 100644 index 00000000..edc89c66 --- /dev/null +++ b/notebooks/Standard errors.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a34f30c6", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Standard errors\n", + "\n", + "This is a notebook demonstrating how to obtain standard errors for your generated impact estimates." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "43b770ca", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import warnings\n", + "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "try:\n", + " import dowhy\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", + "\n", + "try:\n", + " import flaml\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"FLAML\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "53241021", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# this makes the notebook expand to full width of the browser window\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5ed9b5f7", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%javascript\n", + "\n", + "// turn off scrollable windows for large output\n", + "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", + " return false;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "da208ce6", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from causaltune import CausalTune\n", + "from causaltune.datasets import synth_ihdp" + ] + }, + { + "cell_type": "markdown", + "id": "ab536d1b", + "metadata": {}, + "source": [ + "## Loading data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "96719b4d", + "metadata": {}, + "outputs": [], + "source": [ + "# load toy dataset and apply standard pre-processing\n", + "cd = synth_ihdp()\n", + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "49e4721b", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualrandomx1x2x3x4x5x6x7...x16x17x18x19x20x21x22x23x24x25
015.5999161.0-0.528603-0.3434551.1285540.161703-0.3166031.2952161.0...1.01.01.01.00.00.00.00.00.00.0
106.8758561.0-1.736945-1.8020020.3838282.244319-0.6291891.2952160.0...1.01.01.01.00.00.00.00.00.00.0
202.9962730.0-0.807451-0.202946-0.360898-0.8796060.808706-0.5265560.0...1.00.01.01.00.00.00.00.00.00.0
301.3662061.00.3900830.596582-1.850350-0.879606-0.004017-0.8577870.0...1.00.01.01.00.00.00.00.00.00.0
401.9635380.0-1.045228-0.6027100.0114650.1617030.683672-0.3609401.0...1.01.01.01.00.00.00.00.00.00.0
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " treatment y_factual random x1 x2 x3 x4 \\\n", + "0 1 5.599916 1.0 -0.528603 -0.343455 1.128554 0.161703 \n", + "1 0 6.875856 1.0 -1.736945 -1.802002 0.383828 2.244319 \n", + "2 0 2.996273 0.0 -0.807451 -0.202946 -0.360898 -0.879606 \n", + "3 0 1.366206 1.0 0.390083 0.596582 -1.850350 -0.879606 \n", + "4 0 1.963538 0.0 -1.045228 -0.602710 0.011465 0.161703 \n", + "\n", + " x5 x6 x7 ... x16 x17 x18 x19 x20 x21 x22 x23 x24 \\\n", + "0 -0.316603 1.295216 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 -0.629189 1.295216 0.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.808706 -0.526556 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 -0.004017 -0.857787 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.683672 -0.360940 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " x25 \n", + "0 0.0 \n", + "1 0.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# inspect the preprocessed dataset\n", + "display(cd.data.head())" + ] + }, + { + "cell_type": "markdown", + "id": "d4d1871f", + "metadata": {}, + "source": [ + "## Model training and standard errors" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fd4b291e", + "metadata": {}, + "outputs": [], + "source": [ + "# training configs\n", + "\n", + "# set evaluation metric\n", + "metric = \"energy_distance\"\n", + "\n", + "# it's best to specify either time_budget or components_time_budget, \n", + "# and let the other one be inferred; time in seconds\n", + "time_budget = None\n", + "components_time_budget = 10\n", + "\n", + "# specify training set size\n", + "train_size = 0.7\n" + ] + }, + { + "cell_type": "markdown", + "id": "e0f63d12", + "metadata": {}, + "source": [ + "Note that in the example below, we are passing `'cheap_inference'` to `estimator_list`. This configuration will restrict the selection of estimators to the ones that have analytical standard errors.\n", + "\n", + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "097c923e", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n", + "---------------------\n", + "Best estimator: backdoor.econml.dr.ForestDRLearner\n", + "Best config: {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 4.1309041114224745e-06, 'n_estimators': 51, 'min_samples_split': 2, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'log2', 'min_impurity_decrease': 0, 'max_samples': 0.4714678358460523, 'min_balancedness_tol': 0.48107268073765275, 'honest': 1, 'subforest_size': 5}, 'outcome_estimator': {'alpha': 0.0680343251051132, 'fit_intercept': True, 'eps': 3.581001561497127e-16, 'estimator_name': 'lasso_lars'}}\n", + "Best score: 0.19782534210362535\n" + ] + } + ], + "source": [ + "ct = CausalTune(\n", + " estimator_list='cheap_inference',\n", + " metric=metric,\n", + " verbose=0,\n", + " components_verbose=0,\n", + " time_budget=time_budget,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\"\n", + ")\n", + "\n", + "\n", + "# run causaltune\n", + "ct.fit(data=cd, outcome=cd.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dd8b4d04", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[3.06847504],\n", + " [5.10172326],\n", + " [2.3049086 ],\n", + " [4.39115942],\n", + " [4.38397264]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# obtaining effect estimates\n", + "\n", + "test_df = ct.test_df\n", + "\n", + "cates = ct.effect(test_df)\n", + "display(cates[:5,])" + ] + }, + { + "cell_type": "markdown", + "id": "8c819410", + "metadata": {}, + "source": [ + "Below we show how to generate standard errors using `CausalTune.effect_stderr()`. By default, this will use the `best_estimator` identified during training.\n", + "\n", + "If this estimator does not have analytical standard errors, it will be refitted `n_bootstrap_samples`-times on the training data." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0ee744d2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.74527346],\n", + " [0.76067972],\n", + " [0.48614067],\n", + " [0.42494167],\n", + " [0.52123297]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# generating standard errors by refitting train_df \n", + "se = ct.effect_stderr(ct.test_df)\n", + "display(se[:5,])" + ] + }, + { + "cell_type": "markdown", + "id": "9a474ab5", + "metadata": {}, + "source": [ + "In addition to merely generating standard errors, we have the option to generate various other statistical inferences for the effect, such as the standard error, z-test score, and p-value for each sample `X{i}`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "277adbcc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
point_estimatestderrzstatpvalueci_lowerci_upper
X
03.0680.7454.1170.01.8434.294
15.1020.7616.7070.03.8516.353
22.3050.4864.7410.01.5053.105
34.3910.42510.3340.03.6925.090
44.3840.5218.4110.03.5275.241
\n", + "
" + ], + "text/plain": [ + " point_estimate stderr zstat pvalue ci_lower ci_upper\n", + "X \n", + "0 3.068 0.745 4.117 0.0 1.843 4.294\n", + "1 5.102 0.761 6.707 0.0 3.851 6.353\n", + "2 2.305 0.486 4.741 0.0 1.505 3.105\n", + "3 4.391 0.425 10.334 0.0 3.692 5.090\n", + "4 4.384 0.521 8.411 0.0 3.527 5.241" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ct.effect_inference(test_df)[0].summary_frame(alpha=0.1, value=0, decimals=3).head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b200c45-d652-42a8-b8f1-611a119143c3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From f7bcca05e6448c2aaf53301f3baff7e4677d301e Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:59:00 +0100 Subject: [PATCH 05/22] Delete notebooks/ERUPT under simulated random assignment.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- ...PT under simulated random assignment.ipynb | 500 ------------------ 1 file changed, 500 deletions(-) delete mode 100644 notebooks/ERUPT under simulated random assignment.ipynb diff --git a/notebooks/ERUPT under simulated random assignment.ipynb b/notebooks/ERUPT under simulated random assignment.ipynb deleted file mode 100644 index 6ac2b20b..00000000 --- a/notebooks/ERUPT under simulated random assignment.ipynb +++ /dev/null @@ -1,500 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "a34f30c6", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# ERUPT under simulated random assignment" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "c37a7a94", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import os, sys\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "try:\n", - " import dowhy\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", - "\n", - "try:\n", - " import flaml\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", - "\n", - "from causaltune import CausalTune\n", - "from causaltune.datasets import generate_non_random_dataset\n", - "from causaltune.erupt import DummyPropensity, ERUPT\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "53241021", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# this makes the notebook expand to full width of the browser window\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5ed9b5f7", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "application/javascript": [ - "\n", - "// turn off scrollable windows for large output\n", - "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", - " return false;\n", - "}\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%javascript\n", - "\n", - "// turn off scrollable windows for large output\n", - "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", - " return false;\n", - "}" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "af5333b0", - "metadata": {}, - "source": [ - "## Loading data and model training" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a0211b9a", - "metadata": {}, - "outputs": [], - "source": [ - "# load toy dataset with non-random assignment and apply standard pre-processing\n", - "cd = generate_non_random_dataset()\n", - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6cec1abf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TYrandomX1X2X3X4X5propensity
001.2393081.0-0.847134-0.3985630.1765390.9573601.1224570.328241
100.1084420.0-0.583898-0.8992651.177333-0.563962-0.6147370.195308
21-0.8973100.0-2.2375900.061438-0.4625190.777278-1.3790220.345805
310.7574751.0-0.0473190.354603-1.9764290.0819450.4240410.695707
400.8534781.0-0.2568320.0487481.536085-1.0274150.6897330.304767
\n", - "
" - ], - "text/plain": [ - " T Y random X1 X2 X3 X4 X5 \\\n", - "0 0 1.239308 1.0 -0.847134 -0.398563 0.176539 0.957360 1.122457 \n", - "1 0 0.108442 0.0 -0.583898 -0.899265 1.177333 -0.563962 -0.614737 \n", - "2 1 -0.897310 0.0 -2.237590 0.061438 -0.462519 0.777278 -1.379022 \n", - "3 1 0.757475 1.0 -0.047319 0.354603 -1.976429 0.081945 0.424041 \n", - "4 0 0.853478 1.0 -0.256832 0.048748 1.536085 -1.027415 0.689733 \n", - "\n", - " propensity \n", - "0 0.328241 \n", - "1 0.195308 \n", - "2 0.345805 \n", - "3 0.695707 \n", - "4 0.304767 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(cd.data.head())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4b5d0795", - "metadata": {}, - "outputs": [], - "source": [ - "# training configs\n", - "\n", - "# set evaluation metric\n", - "metric = \"energy_distance\"\n", - "\n", - "# it's best to specify either time_budget or components_time_budget, \n", - "# and let the other one be inferred; time in seconds\n", - "time_budget = None\n", - "components_time_budget = 10\n", - "\n", - "# specify training set size\n", - "train_size = 0.7" - ] - }, - { - "cell_type": "markdown", - "id": "33681e65-6dd4-4c7d-a62d-925572b39e81", - "metadata": {}, - "source": [ - "Now if outcome_model=\"auto\" in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by outcome_model=\"nested\" (the default for now)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a51c87f4", - "metadata": {}, - "outputs": [], - "source": [ - "ct = CausalTune(\n", - " estimator_list=[\"CausalForestDML\", \"XLearner\"],\n", - " metric=metric,\n", - " verbose=0,\n", - " components_verbose=0,\n", - " time_budget=time_budget,\n", - " components_time_budget=components_time_budget,\n", - " train_size=train_size,\n", - " outcome_model=\"auto\"\n", - ")\n", - "\n", - "\n", - "# run causaltune\n", - "ct.fit(data=cd, outcome=cd.outcomes[0])\n", - "\n", - "print('---------------------')\n", - "# return best estimator\n", - "print(f\"Best estimator: {ct.best_estimator}\")\n", - "# config of best estimator:\n", - "print(f\"Best config: {ct.best_config}\")\n", - "# best score:\n", - "print(f\"Best score: {ct.best_score}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "19bcfc2e", - "metadata": {}, - "source": [ - "## Random ERUPT" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2bea4e38", - "metadata": {}, - "source": [ - "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "db1b69a3", - "metadata": {}, - "outputs": [], - "source": [ - "use_df = ct.test_df" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "e8afee5a", - "metadata": {}, - "outputs": [], - "source": [ - "# computing mean ERUPT over 10 bootstrapped samples\n", - "\n", - "scores_list = []\n", - "\n", - "for i in range(10):\n", - "\n", - " bootstrap_df = use_df.sample(frac=1, replace=True)\n", - " propensities = bootstrap_df['propensity']\n", - " actual_treatment = bootstrap_df['T']\n", - " outcome = bootstrap_df['Y']\n", - "\n", - " # define the random assignment policy\n", - " random_policy = np.random.randint(0,2, size=len(bootstrap_df))\n", - "\n", - " # define a propensity model that will simply return the propensities when calling predict_proba\n", - " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", - "\n", - " # obtain ERUPT under random policy\n", - " e = ERUPT(treatment_name='T', propensity_model=propensity_model)\n", - " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", - "\n", - "erupt_mean = np.mean(scores_list)\n", - "erupt_sd = np.std(scores_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "438112f2", - "metadata": {}, - "outputs": [], - "source": [ - "# compute naive ate as difference in means\n", - "naive_ate, naive_sd, _ = ct.scorer.naive_ate(ct.test_df['T'], ct.test_df['Y'])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a0f6d079", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
estimated_effectsd
naive_ate0.2181510.124848
random_erupt0.0231410.216845
\n", - "
" - ], - "text/plain": [ - " estimated_effect sd\n", - "naive_ate 0.218151 0.124848\n", - "random_erupt 0.023141 0.216845" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# comparison of naive ate to mean random erupt over 10 bootstrap runs\n", - "erupt_df = pd.DataFrame([[naive_ate,naive_sd],[erupt_mean,erupt_sd]], columns=['estimated_effect', 'sd'], index=['naive_ate','random_erupt'])\n", - "display(erupt_df)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a54530bf", - "metadata": {}, - "source": [ - "For more details on the ERUPT implementation, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957). Note also that we assume that treatment takes integer values from 0 to n." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From ab41d47fe99e0ff45424d067ef581011de7e9de8 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:59:11 +0100 Subject: [PATCH 06/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- ...PT under simulated random assignment.ipynb | 485 ++++++++++++++++++ 1 file changed, 485 insertions(+) create mode 100644 notebooks/ERUPT under simulated random assignment.ipynb diff --git a/notebooks/ERUPT under simulated random assignment.ipynb b/notebooks/ERUPT under simulated random assignment.ipynb new file mode 100644 index 00000000..70c087f0 --- /dev/null +++ b/notebooks/ERUPT under simulated random assignment.ipynb @@ -0,0 +1,485 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a34f30c6", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# ERUPT under simulated random assignment" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c37a7a94", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import warnings\n", + "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "try:\n", + " import dowhy\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", + "\n", + "try:\n", + " import flaml\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", + "\n", + "from causaltune import CausalTune\n", + "from causaltune.datasets import generate_non_random_dataset\n", + "from causaltune.erupt import DummyPropensity, ERUPT\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "53241021", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# this makes the notebook expand to full width of the browser window\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5ed9b5f7", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# %%javascript\n", + "\n", + "# // turn off scrollable windows for large output\n", + "# IPython.OutputArea.prototype._should_scroll = function(lines) {\n", + "# return false;\n", + "# }" + ] + }, + { + "cell_type": "markdown", + "id": "af5333b0", + "metadata": {}, + "source": [ + "## Loading data and model training" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a0211b9a", + "metadata": {}, + "outputs": [], + "source": [ + "# load toy dataset with non-random assignment and apply standard pre-processing\n", + "cd = generate_non_random_dataset()\n", + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6cec1abf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TYrandomX1X2X3X4X5propensity
00-0.5290940.0-0.325404-3.200259-1.0962310.454945-0.6829500.096673
10-2.6739121.0-2.2246411.3841330.5064850.145684-0.1952660.472952
21-1.6664440.00.687121-0.2076140.7886991.131345-0.3520910.550413
30-1.6191430.00.740413-0.6662631.027818-0.197965-2.0252200.423549
400.3311061.0-0.907719-1.7755810.072270-1.7603791.4496680.083704
\n", + "
" + ], + "text/plain": [ + " T Y random X1 X2 X3 X4 X5 \\\n", + "0 0 -0.529094 0.0 -0.325404 -3.200259 -1.096231 0.454945 -0.682950 \n", + "1 0 -2.673912 1.0 -2.224641 1.384133 0.506485 0.145684 -0.195266 \n", + "2 1 -1.666444 0.0 0.687121 -0.207614 0.788699 1.131345 -0.352091 \n", + "3 0 -1.619143 0.0 0.740413 -0.666263 1.027818 -0.197965 -2.025220 \n", + "4 0 0.331106 1.0 -0.907719 -1.775581 0.072270 -1.760379 1.449668 \n", + "\n", + " propensity \n", + "0 0.096673 \n", + "1 0.472952 \n", + "2 0.550413 \n", + "3 0.423549 \n", + "4 0.083704 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(cd.data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4b5d0795", + "metadata": {}, + "outputs": [], + "source": [ + "# training configs\n", + "\n", + "# set evaluation metric\n", + "metric = \"energy_distance\"\n", + "\n", + "# it's best to specify either time_budget or components_time_budget, \n", + "# and let the other one be inferred; time in seconds\n", + "time_budget = None\n", + "components_time_budget = 10\n", + "\n", + "# specify training set size\n", + "train_size = 0.7" + ] + }, + { + "cell_type": "markdown", + "id": "33681e65-6dd4-4c7d-a62d-925572b39e81", + "metadata": {}, + "source": [ + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a51c87f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n", + "---------------------\n", + "Best estimator: backdoor.econml.dml.CausalForestDML\n", + "Best config: {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': 1, 'n_estimators': 2, 'criterion': 'het', 'min_samples_split': 12, 'min_samples_leaf': 8, 'min_weight_fraction_leaf': 0.0, 'max_features': 'log2', 'min_impurity_decrease': 0, 'max_samples': 0.2884902061383809, 'min_balancedness_tol': 0.4585520111743354, 'honest': 1, 'fit_intercept': 1, 'subforest_size': 5}, 'outcome_estimator': {'alpha': 0.006205274971406812, 'fit_intercept': True, 'eps': 7.833744321548246e-15, 'estimator_name': 'lasso_lars'}}\n", + "Best score: 0.2952285030581425\n" + ] + } + ], + "source": [ + "ct = CausalTune(\n", + " estimator_list=[\"CausalForestDML\", \"XLearner\"],\n", + " metric=metric,\n", + " verbose=0,\n", + " components_verbose=0,\n", + " time_budget=time_budget,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\"\n", + ")\n", + "\n", + "\n", + "# run causaltune\n", + "ct.fit(data=cd, outcome=cd.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "markdown", + "id": "19bcfc2e", + "metadata": {}, + "source": [ + "## Random ERUPT" + ] + }, + { + "cell_type": "markdown", + "id": "2bea4e38", + "metadata": {}, + "source": [ + "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "db1b69a3", + "metadata": {}, + "outputs": [], + "source": [ + "use_df = ct.test_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e8afee5a", + "metadata": {}, + "outputs": [], + "source": [ + "# computing mean ERUPT over 10 bootstrapped samples\n", + "\n", + "scores_list = []\n", + "\n", + "for i in range(10):\n", + "\n", + " bootstrap_df = use_df.sample(frac=1, replace=True)\n", + " propensities = bootstrap_df['propensity']\n", + " actual_treatment = bootstrap_df['T']\n", + " outcome = bootstrap_df['Y']\n", + "\n", + " # define the random assignment policy\n", + " random_policy = np.random.randint(0,2, size=len(bootstrap_df))\n", + "\n", + " # define a propensity model that will simply return the propensities when calling predict_proba\n", + " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", + "\n", + " # obtain ERUPT under random policy\n", + " e = ERUPT(treatment_name='T', propensity_model=propensity_model)\n", + " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", + "\n", + "erupt_mean = np.mean(scores_list)\n", + "erupt_sd = np.std(scores_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "438112f2", + "metadata": {}, + "outputs": [], + "source": [ + "# compute naive ate as difference in means\n", + "naive_ate, naive_sd, _ = ct.scorer.naive_ate(ct.test_df['T'], ct.test_df['Y'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a0f6d079", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
estimated_effectsd
naive_ate0.0307400.139801
random_erupt-0.0010590.210618
\n", + "
" + ], + "text/plain": [ + " estimated_effect sd\n", + "naive_ate 0.030740 0.139801\n", + "random_erupt -0.001059 0.210618" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# comparison of naive ate to mean random erupt over 10 bootstrap runs\n", + "erupt_df = pd.DataFrame([[naive_ate,naive_sd],[erupt_mean,erupt_sd]], columns=['estimated_effect', 'sd'], index=['naive_ate','random_erupt'])\n", + "display(erupt_df)" + ] + }, + { + "cell_type": "markdown", + "id": "a54530bf", + "metadata": {}, + "source": [ + "For more details on the ERUPT implementation, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957). Note also that we assume that treatment takes integer values from 0 to n." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 00005c5f3716e8e4802ecca05d7de3d911af9a2d Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:59:41 +0100 Subject: [PATCH 07/22] Delete notebooks/ERUPT under simulated random assignment.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- ...PT under simulated random assignment.ipynb | 485 ------------------ 1 file changed, 485 deletions(-) delete mode 100644 notebooks/ERUPT under simulated random assignment.ipynb diff --git a/notebooks/ERUPT under simulated random assignment.ipynb b/notebooks/ERUPT under simulated random assignment.ipynb deleted file mode 100644 index 70c087f0..00000000 --- a/notebooks/ERUPT under simulated random assignment.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a34f30c6", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# ERUPT under simulated random assignment" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c37a7a94", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import os, sys\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "try:\n", - " import dowhy\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", - "\n", - "try:\n", - " import flaml\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", - "\n", - "from causaltune import CausalTune\n", - "from causaltune.datasets import generate_non_random_dataset\n", - "from causaltune.erupt import DummyPropensity, ERUPT\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "53241021", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# this makes the notebook expand to full width of the browser window\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "5ed9b5f7", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "# %%javascript\n", - "\n", - "# // turn off scrollable windows for large output\n", - "# IPython.OutputArea.prototype._should_scroll = function(lines) {\n", - "# return false;\n", - "# }" - ] - }, - { - "cell_type": "markdown", - "id": "af5333b0", - "metadata": {}, - "source": [ - "## Loading data and model training" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a0211b9a", - "metadata": {}, - "outputs": [], - "source": [ - "# load toy dataset with non-random assignment and apply standard pre-processing\n", - "cd = generate_non_random_dataset()\n", - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6cec1abf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TYrandomX1X2X3X4X5propensity
00-0.5290940.0-0.325404-3.200259-1.0962310.454945-0.6829500.096673
10-2.6739121.0-2.2246411.3841330.5064850.145684-0.1952660.472952
21-1.6664440.00.687121-0.2076140.7886991.131345-0.3520910.550413
30-1.6191430.00.740413-0.6662631.027818-0.197965-2.0252200.423549
400.3311061.0-0.907719-1.7755810.072270-1.7603791.4496680.083704
\n", - "
" - ], - "text/plain": [ - " T Y random X1 X2 X3 X4 X5 \\\n", - "0 0 -0.529094 0.0 -0.325404 -3.200259 -1.096231 0.454945 -0.682950 \n", - "1 0 -2.673912 1.0 -2.224641 1.384133 0.506485 0.145684 -0.195266 \n", - "2 1 -1.666444 0.0 0.687121 -0.207614 0.788699 1.131345 -0.352091 \n", - "3 0 -1.619143 0.0 0.740413 -0.666263 1.027818 -0.197965 -2.025220 \n", - "4 0 0.331106 1.0 -0.907719 -1.775581 0.072270 -1.760379 1.449668 \n", - "\n", - " propensity \n", - "0 0.096673 \n", - "1 0.472952 \n", - "2 0.550413 \n", - "3 0.423549 \n", - "4 0.083704 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(cd.data.head())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4b5d0795", - "metadata": {}, - "outputs": [], - "source": [ - "# training configs\n", - "\n", - "# set evaluation metric\n", - "metric = \"energy_distance\"\n", - "\n", - "# it's best to specify either time_budget or components_time_budget, \n", - "# and let the other one be inferred; time in seconds\n", - "time_budget = None\n", - "components_time_budget = 10\n", - "\n", - "# specify training set size\n", - "train_size = 0.7" - ] - }, - { - "cell_type": "markdown", - "id": "33681e65-6dd4-4c7d-a62d-925572b39e81", - "metadata": {}, - "source": [ - "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", - "\n", - "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "a51c87f4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", - "Propensity Model Fitted Successfully\n", - "---------------------\n", - "Best estimator: backdoor.econml.dml.CausalForestDML\n", - "Best config: {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': 1, 'n_estimators': 2, 'criterion': 'het', 'min_samples_split': 12, 'min_samples_leaf': 8, 'min_weight_fraction_leaf': 0.0, 'max_features': 'log2', 'min_impurity_decrease': 0, 'max_samples': 0.2884902061383809, 'min_balancedness_tol': 0.4585520111743354, 'honest': 1, 'fit_intercept': 1, 'subforest_size': 5}, 'outcome_estimator': {'alpha': 0.006205274971406812, 'fit_intercept': True, 'eps': 7.833744321548246e-15, 'estimator_name': 'lasso_lars'}}\n", - "Best score: 0.2952285030581425\n" - ] - } - ], - "source": [ - "ct = CausalTune(\n", - " estimator_list=[\"CausalForestDML\", \"XLearner\"],\n", - " metric=metric,\n", - " verbose=0,\n", - " components_verbose=0,\n", - " time_budget=time_budget,\n", - " components_time_budget=components_time_budget,\n", - " train_size=train_size,\n", - " outcome_model=\"auto\"\n", - ")\n", - "\n", - "\n", - "# run causaltune\n", - "ct.fit(data=cd, outcome=cd.outcomes[0])\n", - "\n", - "print('---------------------')\n", - "# return best estimator\n", - "print(f\"Best estimator: {ct.best_estimator}\")\n", - "# config of best estimator:\n", - "print(f\"Best config: {ct.best_config}\")\n", - "# best score:\n", - "print(f\"Best score: {ct.best_score}\")" - ] - }, - { - "cell_type": "markdown", - "id": "19bcfc2e", - "metadata": {}, - "source": [ - "## Random ERUPT" - ] - }, - { - "cell_type": "markdown", - "id": "2bea4e38", - "metadata": {}, - "source": [ - "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "db1b69a3", - "metadata": {}, - "outputs": [], - "source": [ - "use_df = ct.test_df" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "e8afee5a", - "metadata": {}, - "outputs": [], - "source": [ - "# computing mean ERUPT over 10 bootstrapped samples\n", - "\n", - "scores_list = []\n", - "\n", - "for i in range(10):\n", - "\n", - " bootstrap_df = use_df.sample(frac=1, replace=True)\n", - " propensities = bootstrap_df['propensity']\n", - " actual_treatment = bootstrap_df['T']\n", - " outcome = bootstrap_df['Y']\n", - "\n", - " # define the random assignment policy\n", - " random_policy = np.random.randint(0,2, size=len(bootstrap_df))\n", - "\n", - " # define a propensity model that will simply return the propensities when calling predict_proba\n", - " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", - "\n", - " # obtain ERUPT under random policy\n", - " e = ERUPT(treatment_name='T', propensity_model=propensity_model)\n", - " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", - "\n", - "erupt_mean = np.mean(scores_list)\n", - "erupt_sd = np.std(scores_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "438112f2", - "metadata": {}, - "outputs": [], - "source": [ - "# compute naive ate as difference in means\n", - "naive_ate, naive_sd, _ = ct.scorer.naive_ate(ct.test_df['T'], ct.test_df['Y'])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a0f6d079", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
estimated_effectsd
naive_ate0.0307400.139801
random_erupt-0.0010590.210618
\n", - "
" - ], - "text/plain": [ - " estimated_effect sd\n", - "naive_ate 0.030740 0.139801\n", - "random_erupt -0.001059 0.210618" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# comparison of naive ate to mean random erupt over 10 bootstrap runs\n", - "erupt_df = pd.DataFrame([[naive_ate,naive_sd],[erupt_mean,erupt_sd]], columns=['estimated_effect', 'sd'], index=['naive_ate','random_erupt'])\n", - "display(erupt_df)" - ] - }, - { - "cell_type": "markdown", - "id": "a54530bf", - "metadata": {}, - "source": [ - "For more details on the ERUPT implementation, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957). Note also that we assume that treatment takes integer values from 0 to n." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From cbd53dcc9376498175ad6c24dde53b664e4362b6 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:59:52 +0100 Subject: [PATCH 08/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- ...PT under simulated random assignment.ipynb | 485 ++++++++++++++++++ 1 file changed, 485 insertions(+) create mode 100644 notebooks/ERUPT under simulated random assignment.ipynb diff --git a/notebooks/ERUPT under simulated random assignment.ipynb b/notebooks/ERUPT under simulated random assignment.ipynb new file mode 100644 index 00000000..cf124cdc --- /dev/null +++ b/notebooks/ERUPT under simulated random assignment.ipynb @@ -0,0 +1,485 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a34f30c6", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# ERUPT under simulated random assignment" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c37a7a94", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import warnings\n", + "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "try:\n", + " import dowhy\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", + "\n", + "try:\n", + " import flaml\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", + "\n", + "from causaltune import CausalTune\n", + "from causaltune.datasets import generate_non_random_dataset\n", + "from causaltune.erupt import DummyPropensity, ERUPT\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "53241021", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# this makes the notebook expand to full width of the browser window\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5ed9b5f7", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%javascript\n", + "\n", + "// turn off scrollable windows for large output\n", + "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", + " return false;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "af5333b0", + "metadata": {}, + "source": [ + "## Loading data and model training" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a0211b9a", + "metadata": {}, + "outputs": [], + "source": [ + "# load toy dataset with non-random assignment and apply standard pre-processing\n", + "cd = generate_non_random_dataset()\n", + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6cec1abf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TYrandomX1X2X3X4X5propensity
00-0.5290940.0-0.325404-3.200259-1.0962310.454945-0.6829500.096673
10-2.6739121.0-2.2246411.3841330.5064850.145684-0.1952660.472952
21-1.6664440.00.687121-0.2076140.7886991.131345-0.3520910.550413
30-1.6191430.00.740413-0.6662631.027818-0.197965-2.0252200.423549
400.3311061.0-0.907719-1.7755810.072270-1.7603791.4496680.083704
\n", + "
" + ], + "text/plain": [ + " T Y random X1 X2 X3 X4 X5 \\\n", + "0 0 -0.529094 0.0 -0.325404 -3.200259 -1.096231 0.454945 -0.682950 \n", + "1 0 -2.673912 1.0 -2.224641 1.384133 0.506485 0.145684 -0.195266 \n", + "2 1 -1.666444 0.0 0.687121 -0.207614 0.788699 1.131345 -0.352091 \n", + "3 0 -1.619143 0.0 0.740413 -0.666263 1.027818 -0.197965 -2.025220 \n", + "4 0 0.331106 1.0 -0.907719 -1.775581 0.072270 -1.760379 1.449668 \n", + "\n", + " propensity \n", + "0 0.096673 \n", + "1 0.472952 \n", + "2 0.550413 \n", + "3 0.423549 \n", + "4 0.083704 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(cd.data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4b5d0795", + "metadata": {}, + "outputs": [], + "source": [ + "# training configs\n", + "\n", + "# set evaluation metric\n", + "metric = \"energy_distance\"\n", + "\n", + "# it's best to specify either time_budget or components_time_budget, \n", + "# and let the other one be inferred; time in seconds\n", + "time_budget = None\n", + "components_time_budget = 10\n", + "\n", + "# specify training set size\n", + "train_size = 0.7" + ] + }, + { + "cell_type": "markdown", + "id": "33681e65-6dd4-4c7d-a62d-925572b39e81", + "metadata": {}, + "source": [ + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a51c87f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n", + "---------------------\n", + "Best estimator: backdoor.econml.dml.CausalForestDML\n", + "Best config: {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': 1, 'n_estimators': 2, 'criterion': 'het', 'min_samples_split': 12, 'min_samples_leaf': 8, 'min_weight_fraction_leaf': 0.0, 'max_features': 'log2', 'min_impurity_decrease': 0, 'max_samples': 0.2884902061383809, 'min_balancedness_tol': 0.4585520111743354, 'honest': 1, 'fit_intercept': 1, 'subforest_size': 5}, 'outcome_estimator': {'alpha': 0.006205274971406812, 'fit_intercept': True, 'eps': 7.833744321548246e-15, 'estimator_name': 'lasso_lars'}}\n", + "Best score: 0.2952285030581425\n" + ] + } + ], + "source": [ + "ct = CausalTune(\n", + " estimator_list=[\"CausalForestDML\", \"XLearner\"],\n", + " metric=metric,\n", + " verbose=0,\n", + " components_verbose=0,\n", + " time_budget=time_budget,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\"\n", + ")\n", + "\n", + "\n", + "# run causaltune\n", + "ct.fit(data=cd, outcome=cd.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "markdown", + "id": "19bcfc2e", + "metadata": {}, + "source": [ + "## Random ERUPT" + ] + }, + { + "cell_type": "markdown", + "id": "2bea4e38", + "metadata": {}, + "source": [ + "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "db1b69a3", + "metadata": {}, + "outputs": [], + "source": [ + "use_df = ct.test_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e8afee5a", + "metadata": {}, + "outputs": [], + "source": [ + "# computing mean ERUPT over 10 bootstrapped samples\n", + "\n", + "scores_list = []\n", + "\n", + "for i in range(10):\n", + "\n", + " bootstrap_df = use_df.sample(frac=1, replace=True)\n", + " propensities = bootstrap_df['propensity']\n", + " actual_treatment = bootstrap_df['T']\n", + " outcome = bootstrap_df['Y']\n", + "\n", + " # define the random assignment policy\n", + " random_policy = np.random.randint(0,2, size=len(bootstrap_df))\n", + "\n", + " # define a propensity model that will simply return the propensities when calling predict_proba\n", + " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", + "\n", + " # obtain ERUPT under random policy\n", + " e = ERUPT(treatment_name='T', propensity_model=propensity_model)\n", + " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", + "\n", + "erupt_mean = np.mean(scores_list)\n", + "erupt_sd = np.std(scores_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "438112f2", + "metadata": {}, + "outputs": [], + "source": [ + "# compute naive ate as difference in means\n", + "naive_ate, naive_sd, _ = ct.scorer.naive_ate(ct.test_df['T'], ct.test_df['Y'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a0f6d079", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
estimated_effectsd
naive_ate0.0307400.139801
random_erupt-0.0010590.210618
\n", + "
" + ], + "text/plain": [ + " estimated_effect sd\n", + "naive_ate 0.030740 0.139801\n", + "random_erupt -0.001059 0.210618" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# comparison of naive ate to mean random erupt over 10 bootstrap runs\n", + "erupt_df = pd.DataFrame([[naive_ate,naive_sd],[erupt_mean,erupt_sd]], columns=['estimated_effect', 'sd'], index=['naive_ate','random_erupt'])\n", + "display(erupt_df)" + ] + }, + { + "cell_type": "markdown", + "id": "a54530bf", + "metadata": {}, + "source": [ + "For more details on the ERUPT implementation, consult [Hitsch and Misra (2018)](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957). Note also that we assume that treatment takes integer values from 0 to n." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c8bfbd81f618986512056513c76d6fbb9f5a2211 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:06:01 +0100 Subject: [PATCH 09/22] Delete notebooks/Multiple treatments examples.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/Multiple treatments examples.ipynb | 1322 ------------------ 1 file changed, 1322 deletions(-) delete mode 100644 notebooks/Multiple treatments examples.ipynb diff --git a/notebooks/Multiple treatments examples.ipynb b/notebooks/Multiple treatments examples.ipynb deleted file mode 100644 index 4242c362..00000000 --- a/notebooks/Multiple treatments examples.ipynb +++ /dev/null @@ -1,1322 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "afbd11d5-c7d7-4467-9b80-fa73b58bae00", - "metadata": {}, - "source": [ - "# Multiple treatments examples\n", - "- In this notebook, we want to demonstrate common use cases where we have multiple treatments (ab testing is not covered, as you can just use another notebook for it)\n", - "- Simple CausalTune training with multiple treatments\n", - "- Applying custom propensities and train CausalTune (for example, in Uplift models)\n", - "- ERUPT for multiple treatments" - ] - }, - { - "cell_type": "markdown", - "id": "11352cfb-89b2-48be-adb5-e68cd95d2ce8", - "metadata": {}, - "source": [ - "### Setting up the data and causal model: CausalityDataset\n", - "- This notebook demonstrates how to train CausalTune with multiple custom propensities.\n", - "- The easiest example here is uplift modeling. Suppose we trained a model, sent messages, launched an experiment.\n", - "- And now we want to train more complex model using previous round model as propensities." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "fbcf1af0-3ad3-4666-b465-633f950409be", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import os, sys\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "from typing import List, Union\n", - "import random\n", - "\n", - "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "try:\n", - " import dowhy\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", - "\n", - "try:\n", - " import flaml\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", - " \n", - " \n", - " \n", - "from causaltune import CausalTune\n", - "from causaltune.datasets import synth_ihdp\n", - "from causaltune.data_utils import CausalityDataset\n", - "from causaltune.erupt import DummyPropensity, ERUPT\n", - "from causaltune.models.passthrough import passthrough_model" - ] - }, - { - "cell_type": "markdown", - "id": "8c238be1-3759-40ba-90bd-ebe30a9c9e5e", - "metadata": {}, - "source": [ - "### Data and generating treatments\n", - "- We first illustrate the model setup with a subset of data from the Infant Health and Development Program (IHDP).\n", - "- Then we synthetically create multiple treatments" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "36870c05-351b-4550-9af9-1aef6b68664d", - "metadata": {}, - "outputs": [], - "source": [ - "data = synth_ihdp(return_df=True).iloc[:,:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e2bf1659-9e49-4e5a-b3c2-767b04e4af57", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
treatmenty_factualx1x2x3x4x5x6x7x8
015.599916-0.528603-0.3434551.1285540.161703-0.3166031.29521610
106.875856-1.736945-1.8020020.3838282.244320-0.6291891.29521600
202.996273-0.807451-0.202946-0.360898-0.8796060.808706-0.52655600
301.3662060.3900830.596582-1.850350-0.879606-0.004017-0.85778700
401.963538-1.045229-0.6027100.0114650.1617030.683672-0.36094010
\n", - "
" - ], - "text/plain": [ - " treatment y_factual x1 x2 x3 x4 x5 \\\n", - "0 1 5.599916 -0.528603 -0.343455 1.128554 0.161703 -0.316603 \n", - "1 0 6.875856 -1.736945 -1.802002 0.383828 2.244320 -0.629189 \n", - "2 0 2.996273 -0.807451 -0.202946 -0.360898 -0.879606 0.808706 \n", - "3 0 1.366206 0.390083 0.596582 -1.850350 -0.879606 -0.004017 \n", - "4 0 1.963538 -1.045229 -0.602710 0.011465 0.161703 0.683672 \n", - "\n", - " x6 x7 x8 \n", - "0 1.295216 1 0 \n", - "1 1.295216 0 0 \n", - "2 -0.526556 0 0 \n", - "3 -0.857787 0 0 \n", - "4 -0.360940 1 0 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9015a4f3-7df9-4b8d-ade5-6fa022dbb6ea", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAACYAAAAPCAYAAACInr1QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAABJ0AAASdAHeZh94AAABhUlEQVR4nM2Vvy8EQRTHP8cWJFfwB2glIgoaIqfxP1CdhASlKFQSntdpFBINdxdBr3L0Irn2Wko/Sh1BcU4xc3JZ83ZvL1d4zc7u9818vvvm7Wyu2WzyHyNqDVR1GThNyf8WkX5LVNUicOFv10SkHNM7ZkRtD+qAGslzwDxwk2BqBDgC3oC8kdYx49eYiNT9xBC05ocnhp7DVeIVuAS2QnlZGH2G+/YJE8AM8AJUjbQN3NuuAO9pa3bCSDUGrPtrRUQagUXHgH3gUERus5qyGInGVHUQKAINoBzQI1yzPwLb3TiyGJE5w8UiMARUReQpoO8Ck0BBRD66MWYx0rayVeLjuKCq07gqHYhILa5niCDDNKaq48As8Axcx7QIOAcegJ1uHSUxkrYyqenzwKgff6oGj6aSqpZwH8VmVkbQmKoOAEu4hqwEUr6M5wBTuL67A+6B4DanMayKLQDDwFWo6X2jrxrAPW/sLP5LysKweqxV4uBJ36NIZPwx5g/MAoGG7FV0wvgBj4OYCC1tHJMAAAAASUVORK5CYII=", - "text/latex": [ - "$\\displaystyle 747$" - ], - "text/plain": [ - "747" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(data)" - ] - }, - { - "cell_type": "markdown", - "id": "1051b3de-cc27-41df-8fe1-56aac7627dd4", - "metadata": {}, - "source": [ - "- Here we just randomly create multiple treatments" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "69cc5159-d305-44c1-9900-b6f57233e6ba", - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "def generate_treatments(N: int, n_groups: int) -> List[List[Union[int, float]]]:\n", - " treatments = [random.randint(0, n_groups) for _ in range(N)]\n", - " values = [[random.random() for _ in range(N)] for _ in range(n_groups+1)]\n", - " row_sums = [sum(row) for row in values]\n", - " probabilities = [[prob / row_sum for prob in row] for row, row_sum in zip(values, row_sums)]\n", - " for i in range(N):\n", - " sum_probs = sum(row[i] for row in probabilities)\n", - " for row in probabilities:\n", - " row[i] /= sum_probs\n", - "\n", - " return [treatments] + probabilities\n", - "\n", - "N = 747\n", - "n_groups = 4\n", - "\n", - "treatments = generate_treatments(N, n_groups)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "09159be8-9f68-45fc-9e2a-e843ce975f0a", - "metadata": {}, - "outputs": [], - "source": [ - "data['treatment'] = treatments[0]\n", - "data['propensity_control'] = treatments[1]\n", - "data['propensity_treatment1'] = treatments[2]\n", - "data['propensity_treatment2'] = treatments[3]\n", - "data['propensity_treatment3'] = treatments[4]\n", - "data['propensity_treatment4'] = treatments[5]\n", - "\n", - "data['propensity_selected_group'] = data.apply(\n", - " lambda row: row['propensity_control'] if row['treatment'] == 0 else \n", - " (row['propensity_treatment1'] if row['treatment'] == 1 else \n", - " (row['propensity_treatment2'] if row['treatment'] == 2 else \n", - " (row['propensity_treatment3'] if row['treatment'] == 3 else\n", - " (row['propensity_treatment4'] if row['treatment'] == 4 else None)))),\n", - " axis=1\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "379a507a-f85c-435a-8abf-ee1b27adb66d", - "metadata": {}, - "source": [ - "- Now we have `treatment` (0, 1, 2, 3, 4)\n", - "- Outcome `y_factual`\n", - "- Features `x1, x2, x3, x4, x5, x6, x7, x8`\n", - "- Propensities `control, treatment1, treatment2, treatment3, treatment4`\n", - "- selected treatment related propensity `propensity_selected_group`" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "c13377bd-0608-4ef7-b3a5-0a44e36f32f4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
treatmenty_factualx1x2x3x4x5x6x7x8propensity_controlpropensity_treatment1propensity_treatment2propensity_treatment3propensity_treatment4propensity_selected_group
015.599916-0.528603-0.3434551.1285540.161703-0.3166031.295216100.0923360.4130070.0158410.2585680.2202480.413007
106.875856-1.736945-1.8020020.3838282.244320-0.6291891.295216000.0497680.3295090.1578640.2297020.2331570.049768
212.996273-0.807451-0.202946-0.360898-0.8796060.808706-0.526556000.1616290.1434180.1444130.3052260.2453140.143418
301.3662060.3900830.596582-1.850350-0.879606-0.004017-0.857787000.1888820.1215540.2107540.2173490.2614610.188882
411.963538-1.045229-0.6027100.0114650.1617030.683672-0.360940100.2513650.3219260.2152220.0080520.2034360.321926
\n", - "
" - ], - "text/plain": [ - " treatment y_factual x1 x2 x3 x4 x5 \\\n", - "0 1 5.599916 -0.528603 -0.343455 1.128554 0.161703 -0.316603 \n", - "1 0 6.875856 -1.736945 -1.802002 0.383828 2.244320 -0.629189 \n", - "2 1 2.996273 -0.807451 -0.202946 -0.360898 -0.879606 0.808706 \n", - "3 0 1.366206 0.390083 0.596582 -1.850350 -0.879606 -0.004017 \n", - "4 1 1.963538 -1.045229 -0.602710 0.011465 0.161703 0.683672 \n", - "\n", - " x6 x7 x8 propensity_control propensity_treatment1 \\\n", - "0 1.295216 1 0 0.092336 0.413007 \n", - "1 1.295216 0 0 0.049768 0.329509 \n", - "2 -0.526556 0 0 0.161629 0.143418 \n", - "3 -0.857787 0 0 0.188882 0.121554 \n", - "4 -0.360940 1 0 0.251365 0.321926 \n", - "\n", - " propensity_treatment2 propensity_treatment3 propensity_treatment4 \\\n", - "0 0.015841 0.258568 0.220248 \n", - "1 0.157864 0.229702 0.233157 \n", - "2 0.144413 0.305226 0.245314 \n", - "3 0.210754 0.217349 0.261461 \n", - "4 0.215222 0.008052 0.203436 \n", - "\n", - " propensity_selected_group \n", - "0 0.413007 \n", - "1 0.049768 \n", - "2 0.143418 \n", - "3 0.188882 \n", - "4 0.321926 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data.head()" - ] - }, - { - "cell_type": "markdown", - "id": "98330541-4245-4790-84d3-0deb00aeb1bc", - "metadata": {}, - "source": [ - "Generally, at least four arguments have to be supplied to `CausalityDataset` if you want to train a model:\n", - "- `data`: input dataframe\n", - "- `treatment`: name of treatment column\n", - "- `outcomes`: list of names of outcome columns; provide as list even if there's just one outcome of interest\n", - "- `effect_modifiers`: list of names of feature columns;\n", - "\n", - "In addition, if the propensities to treat are known, then provide the corresponding column name(s) via `propensity_modifiers`." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7b8011b6-1c23-43de-b76a-52baf74dc656", - "metadata": {}, - "outputs": [], - "source": [ - "features = [\n", - " 'x1',\n", - " 'x2',\n", - " 'x3',\n", - " 'x4',\n", - " 'x5',\n", - " 'x6',\n", - " 'x7',\n", - " 'x8'\n", - "]\n", - "\n", - "propensities = [\n", - " 'propensity_control',\n", - " 'propensity_treatment1',\n", - " 'propensity_treatment2',\n", - " 'propensity_treatment3',\n", - " 'propensity_treatment4'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1a894d9f-1b20-4f83-8425-f38c1bebf146", - "metadata": {}, - "outputs": [], - "source": [ - "cd = CausalityDataset(\n", - " data=data,\n", - " treatment='treatment',\n", - " outcomes=['y_factual'],\n", - " effect_modifiers = features,\n", - " propensity_modifiers = propensities\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "8df6ecc4-7758-4f99-be0d-a21b118b56d1", - "metadata": {}, - "source": [ - "- To transform categorical columns and do some manipulations" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "cbd3c8f9-38db-4458-9691-e6d80c448f08", - "metadata": {}, - "outputs": [], - "source": [ - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "d0288aa2-e1b2-4bf8-877d-b45c52f3bc74", - "metadata": {}, - "source": [ - "### Train CausalTune with simple energy distance (multiple treatments)\n", - "- Here we fit a (selection of) model(s) to the data and score them with the energy distance metric" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "fd3239ff-6d8e-4433-b307-e58ff237d807", - "metadata": {}, - "outputs": [], - "source": [ - "# training configs\n", - "\n", - "# set evaluation metric\n", - "metric = \"energy_distance\"\n", - "\n", - "# it's best to specify either time_budget or components_time_budget, \n", - "# and let the other one be inferred; time in seconds\n", - "components_time_budget = 10\n", - "\n", - "# specify training set size\n", - "train_size = 0.7" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "79762873-547f-4c0a-8637-c7ea11e86ca4", - "metadata": {}, - "outputs": [], - "source": [ - "ct = CausalTune(\n", - " estimator_list=[\n", - " \"DomainAdaptationLearner\",\n", - " \"CausalForestDML\",\n", - " \"ForestDRLearner\",\n", - " ],\n", - " metric=metric,\n", - " verbose=1,\n", - " components_time_budget=components_time_budget,\n", - " train_size=train_size,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "6e54c819-d1c4-488e-89dd-37320362f561", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[flaml.tune.tune: 04-04 21:11:06] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n", - "[flaml.tune.tune: 04-04 21:11:06] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", - "Initial configs: [{'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}]\n", - "---------------------\n", - "Best estimator: backdoor.econml.metalearners.DomainAdaptationLearner\n", - "Best config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}\n", - "Best score: 0.17874868883720296\n" - ] - } - ], - "source": [ - "# run causaltune\n", - "ct.fit(data=cd, outcome=cd.outcomes[0])\n", - "\n", - "print('---------------------')\n", - "# return best estimator\n", - "print(f\"Best estimator: {ct.best_estimator}\")\n", - "# config of best estimator:\n", - "print(f\"Best config: {ct.best_config}\")\n", - "# best score:\n", - "print(f\"Best score: {ct.best_score}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "3c0969e0-60b2-48db-895a-e3045487c5d3", - "metadata": {}, - "outputs": [], - "source": [ - "preds = ct.effect(cd.data)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "9e68cb46-0d2c-49e5-9c4c-622038afd8db", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-0.16751623, 0.10701928, 0.85091579, 0.77418554],\n", - " [-0.16751623, 0.10701928, 1.60185647, 0.30859345],\n", - " [ 0.7479797 , -2.05958891, -0.75960481, -0.05422062],\n", - " ...,\n", - " [ 0.44195521, 0.52434093, 0.19784707, 0.77418554],\n", - " [-0.35218936, 0.52434093, 0.85091579, 0.77418554],\n", - " [ 1.91686213, 0.10701928, 0.19784707, 0.77418554]])" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "preds" - ] - }, - { - "cell_type": "markdown", - "id": "9b9218e5-33ac-47c0-a3af-f264620e78be", - "metadata": {}, - "source": [ - "- Here we want to get results from the predictions\n", - "- We do naive argmax, but it is also recommended to use Thompson Sampling" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "2f437600-0161-4439-a30c-695dca858e6e", - "metadata": {}, - "outputs": [], - "source": [ - "predicted_treatments = np.argmax(preds, axis=1) + 1" - ] - }, - { - "cell_type": "markdown", - "id": "31cf24b5-013b-43a9-9db9-85f214f0dad7", - "metadata": {}, - "source": [ - "### Train CausalTune with custom multiple propensities\n", - "In some settings such as uplift modelling, the experiment / study is based on heterogeneous treatment propensities known to the researcher / experimenter. An array of treatment propensities can be directly supplied to CausalTune in the data instantiation of the `CausalityDataset`. This can, e.g. be done by \n", - "```\n", - "cd = CausalityDataset(\n", - " ...\n", - " propensity_modifiers=[]\n", - " ...\n", - ")\n", - "```\n", - "and then using the `passthrough_model` as follows" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "01347374-8524-4906-8a80-af02602fbfc1", - "metadata": {}, - "outputs": [], - "source": [ - "propensity_model=passthrough_model(\n", - " cd.propensity_modifiers, include_control=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "f0255a70-dd9d-45e6-ae6a-4b5bc6001714", - "metadata": {}, - "outputs": [], - "source": [ - "# set evaluation metric, if we want to use custom propensities, we should use \"psw_energy_distance\" metric\n", - "metric = \"psw_energy_distance\"" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "ef2b635b-c92f-45c2-9038-c69d4ddd5c62", - "metadata": {}, - "outputs": [], - "source": [ - "ct2 = CausalTune(\n", - " estimator_list=[\n", - " \"DomainAdaptationLearner\",\n", - " \"CausalForestDML\",\n", - " \"ForestDRLearner\",\n", - " ],\n", - " metric=metric,\n", - " verbose=1,\n", - " components_time_budget=components_time_budget,\n", - " train_size=train_size,\n", - " propensity_model=propensity_model\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "ed6ad9d8-efcd-4123-935e-ebac56b39143", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Using energy_distance metric as psw_energy_distance is not in the list of supported metrics for this usecase (['energy_distance'])\n", - "WARNING:flaml.tune.tune:Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n", - "INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune\n", - "INFO:flaml.tune.tune:trial 1 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}\n", - "WARNING:dowhy.causal_estimator:Concatenating common_causes and effect_modifiers and providing a single list of variables to metalearner estimator method, DomainAdaptationLearner. EconML metalearners accept a single X argument.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", - "Initial configs: [{'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}]\n", - "---------------------\n", - "Best estimator: backdoor.econml.metalearners.DomainAdaptationLearner\n", - "Best config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}\n", - "Best score: 0.271265592878807\n" - ] - } - ], - "source": [ - "# run causaltune\n", - "ct2.fit(data=cd, outcome=cd.outcomes[0])\n", - "\n", - "print('---------------------')\n", - "# return best estimator\n", - "print(f\"Best estimator: {ct2.best_estimator}\")\n", - "# config of best estimator:\n", - "print(f\"Best config: {ct2.best_config}\")\n", - "# best score:\n", - "print(f\"Best score: {ct2.best_score}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "4116f254-1428-4283-9a44-095ac5de95d5", - "metadata": {}, - "outputs": [], - "source": [ - "preds2 = ct2.effect(cd.data)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "1768aa4c-43c6-47d6-8cc7-2626469ce3f7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-0.51886928, -0.58574743, 0.20972794, -0.83253706],\n", - " [-0.51886928, -0.12708488, 0.78949869, -0.83253706],\n", - " [ 1.60285747, 0.31716518, 0.20972794, 0.75203586],\n", - " ...,\n", - " [ 1.91001916, 0.1818637 , -1.19342971, 0.31739539],\n", - " [ 1.91001916, -0.06212608, 0.49456951, 1.64734221],\n", - " [ 1.60285747, 0.31716518, -1.19342971, 0.31739539]])" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "preds2" - ] - }, - { - "cell_type": "markdown", - "id": "196bd451-6663-4d59-84f7-d65eb2547015", - "metadata": {}, - "source": [ - "- Here we want to get results from the predictions\n", - "- We do naive argmax, but it is also recommended to use Thompson Sampling" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "e3265a9e-b58e-477e-a1a9-c4af38b8b422", - "metadata": {}, - "outputs": [], - "source": [ - "predicted_treatments2 = np.argmax(preds2, axis=1) + 1" - ] - }, - { - "cell_type": "markdown", - "id": "f1a6c03f-ce5a-4b7e-91d0-146bd0feba77", - "metadata": {}, - "source": [ - "### Run ERUPT with multiple propensities\n", - "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "ff674637-1136-46b6-ae6d-a0cdd3218b56", - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "test = synth_ihdp(return_df=True).iloc[:,:10]\n", - "N = 747\n", - "n_groups = 4\n", - "\n", - "treatments = generate_treatments(N, n_groups)\n", - "\n", - "test['treatment'] = treatments[0]\n", - "test['propensity_control'] = treatments[1]\n", - "test['propensity_treatment1'] = treatments[2]\n", - "test['propensity_treatment2'] = treatments[3]\n", - "test['propensity_treatment3'] = treatments[4]\n", - "test['propensity_treatment4'] = treatments[5]\n", - "\n", - "test['propensity_selected_group'] = data.apply(\n", - " lambda row: row['propensity_control'] if row['treatment'] == 0 else \n", - " (row['propensity_treatment1'] if row['treatment'] == 1 else \n", - " (row['propensity_treatment2'] if row['treatment'] == 2 else \n", - " (row['propensity_treatment3'] if row['treatment'] == 3 else\n", - " (row['propensity_treatment4'] if row['treatment'] == 4 else None)))),\n", - " axis=1\n", - ")\n", - "\n", - "def randomize(value):\n", - " return value + np.random.uniform(-1, 1)\n", - "\n", - "for col in features:\n", - " test[col] = test[col].apply(randomize)\n", - "\n", - "test['y_factual'] = test['y_factual'].apply(randomize)\n", - "test['treatment'] = test['treatment'].sample(len(test))" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "74e51a09-3a76-47c8-a7f4-7665d48873fa", - "metadata": {}, - "outputs": [], - "source": [ - "test = test[test['treatment'] != 0].reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "345bfe6d-dbdd-4ac9-a8d1-d15860c691ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
treatmenty_factualx1x2x3x4x5x6x7x8propensity_controlpropensity_treatment1propensity_treatment2propensity_treatment3propensity_treatment4propensity_selected_group
014.699637-0.779523-0.2563491.335451-0.7689570.1203051.2371361.1069440.1956160.1386870.2812080.1216570.1888730.2695740.413007
136.175675-0.835516-0.9890580.7202542.4099330.0960921.6190430.1405740.3294780.2235280.1878950.1193540.2409130.2283090.049768
223.071435-0.3434630.045530-0.1219170.1112690.167218-0.024201-0.579786-0.1613310.1575340.0905920.2746810.3189410.1582520.143418
311.5177910.587400-0.169622-1.923362-0.1682140.595990-0.3500510.4845430.4021950.2520370.5922560.0128720.1177460.0250900.188882
412.253838-1.7331910.276954-0.2289630.2045940.789086-0.2051580.050398-0.1785370.0446870.4093170.0214720.0962610.4282620.321926
\n", - "
" - ], - "text/plain": [ - " treatment y_factual x1 x2 x3 x4 x5 \\\n", - "0 1 4.699637 -0.779523 -0.256349 1.335451 -0.768957 0.120305 \n", - "1 3 6.175675 -0.835516 -0.989058 0.720254 2.409933 0.096092 \n", - "2 2 3.071435 -0.343463 0.045530 -0.121917 0.111269 0.167218 \n", - "3 1 1.517791 0.587400 -0.169622 -1.923362 -0.168214 0.595990 \n", - "4 1 2.253838 -1.733191 0.276954 -0.228963 0.204594 0.789086 \n", - "\n", - " x6 x7 x8 propensity_control propensity_treatment1 \\\n", - "0 1.237136 1.106944 0.195616 0.138687 0.281208 \n", - "1 1.619043 0.140574 0.329478 0.223528 0.187895 \n", - "2 -0.024201 -0.579786 -0.161331 0.157534 0.090592 \n", - "3 -0.350051 0.484543 0.402195 0.252037 0.592256 \n", - "4 -0.205158 0.050398 -0.178537 0.044687 0.409317 \n", - "\n", - " propensity_treatment2 propensity_treatment3 propensity_treatment4 \\\n", - "0 0.121657 0.188873 0.269574 \n", - "1 0.119354 0.240913 0.228309 \n", - "2 0.274681 0.318941 0.158252 \n", - "3 0.012872 0.117746 0.025090 \n", - "4 0.021472 0.096261 0.428262 \n", - "\n", - " propensity_selected_group \n", - "0 0.413007 \n", - "1 0.049768 \n", - "2 0.143418 \n", - "3 0.188882 \n", - "4 0.321926 " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "edeff630-5d92-4f1a-9510-e9272c477294", - "metadata": {}, - "outputs": [], - "source": [ - "cd_test = CausalityDataset(\n", - " data=test,\n", - " treatment='treatment',\n", - " outcomes=['y_factual'],\n", - " effect_modifiers = features,\n", - " propensity_modifiers = propensities\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "151b9267-310b-4f6d-adaf-db2d44f63c6d", - "metadata": {}, - "outputs": [], - "source": [ - "cd_test.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "46659420-fa53-4c5b-891e-a3fc99bc84cf", - "metadata": {}, - "outputs": [], - "source": [ - "preds_test = ct2.effect(cd_test.data)" - ] - }, - { - "cell_type": "markdown", - "id": "0788bc9e-5b0e-4b86-b128-7c5f0b11770e", - "metadata": {}, - "source": [ - "- Here we want to get results from the predictions\n", - "- We do naive argmax, but it is also recommended to use Thompson Sampling" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "30d23496-c5d6-4702-be34-01d7a85e8d88", - "metadata": {}, - "outputs": [], - "source": [ - "treatments_test = np.argmax(preds_test, axis=1) + 1" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "4832a0d9-18e9-4a2f-9e72-ef207d1d8fb5", - "metadata": {}, - "outputs": [], - "source": [ - "test['predicted_treatment'] = treatments_test\n", - "use_df = test" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "470e4aae-a435-48f1-95bd-35d9d5d71c84", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 476.18it/s]\n" - ] - } - ], - "source": [ - "# computing mean ERUPT over 10 bootstrapped samples\n", - "from tqdm import tqdm\n", - "scores_list = []\n", - "\n", - "for i in tqdm(range(10)):\n", - "\n", - " bootstrap_df = use_df.sample(frac=1, replace=True)\n", - " propensities = bootstrap_df['propensity_selected_group']\n", - " actual_treatment = bootstrap_df['treatment']\n", - " outcome = bootstrap_df['y_factual']\n", - "\n", - " # define the random assignment policy\n", - " random_policy = bootstrap_df['predicted_treatment']\n", - "\n", - " # define a propensity model that will simply return the propensities when calling predict_proba\n", - " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", - "\n", - " # obtain ERUPT under random policy\n", - " e = ERUPT(treatment_name='treatment', propensity_model=propensity_model)\n", - " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", - "\n", - "erupt_mean = np.mean(scores_list)\n", - "erupt_sd = np.std(scores_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "259f373d-691c-4178-921d-3bd1cfd04b07", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMYAAAAQCAYAAABN/ABvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAABJ0AAASdAHeZh94AAAIHklEQVR4nO2afbBWVRXGf1dvoN7UDEXKUoQi8fNSRlCJ3MGoQExKy2kgc0bISQa/0MyyxeOMIZUIZJqUA0VMjVmShhCIjKSYzCgOOpCSfCSWJNC1Gx9DfPTH2od77r7nvPec93397z4z7+z37LPW3vs5a3+stfduOHToEN3oRjc6ojH9IGk6cD4wADgR2ANsARYC95rZjrIVSBoBTAKGAicAO4CXgFlm9ngd5D8A3AF8DugF/DO0V2b27wz5UhwlNQBXh99ZQAOwHvg5MMfMDmbUcRlwIdAMnAccCywws3E53+jrwNysdykcNLMjM3RHA9cBZ9LO/3lghpk9WwsXSb2AscBo4BzgFGAfbo+5wNy0TrU8yn6voLMZOC3n9TYz65OhU9j2R0S6NwBNwDJgFrAA2A9MBdZK+mBeQ3Ma/wPgidCYR4G7gUXAScDwOsj3xzvBVcBq4B5gI95Rng2GjVGW46+AOUBf4Nd4JzoGuB+Yl0P9u/jgbgbeyJFJ40VAOb8ng8ziWCkY+o/AR4Elgc8LwBeAZyTFHassl8uBnwGfAJ4DZgK/A84Oug+FwVYTD8p/rwRv59T1oxz5wrZvjBSPM7O9cWmS7gRuA74NfLNIiyVNAG4GfgFMNLN90ft31SIfcB/QG5hsZj9Oyc7AP8KdwDXVcpQ0FvgqsAkYbGbbQ34PvIOMl7TQzH4fFXcDsBX4Gz4Trsho+2GY2Yt4p+oEScmsPyfK7wNMAbYB55rZv1LvWvCOeAc+GKrl8ipwCbAoWhluwyeiLwFfDPpV8Qgo9b1SaDWzqQVloYTtO6wYWUoBD4X0w0Vql9QT75R/J6OTh7r+V6180OkPjAQ2Az+JxYFduLGbonLKcBwb0ruTjhTK2AfcHh4nZbR1hZltMLOaAjhJ5wBD8Fl0UfT6NNx+z6UHRVI/0IavtFVzMbMnzeyx2MUyszeBn4bH4TXyqNv36gplbB+vGHkYE9K1BeU/gxtlJnAw+MFnA3uB1bHvW4U8QEtIl2YYrk3SM/jAGQIsL9DmLI6Jn7oxQz7Ju0BSj6zBXAdMDOmDZnYgercB9/cHSzox3dklDcP99IUp+XpzSSaq/QVkK/GoBT2Du3gqPhGuBVZWUUcn22cODElTgHcDx+P+/qeD0l0FK/p4SPcCa/BOni5/JXCZmb1VpTzAR0L6ak4bNuADYwAZA6Mgx6SznZ5Rfr+QNob/f81pR1WQdDQwDjiA+/MdYGY7JX0LmAGsk7QQ36joj7s/y4BvpFTqxkVSI/C18LikFh41og8wP8rbJOkqM3uqQpu6tH0cfCeYgrsj1welJcDIqGNWQu+Q3gwcAi7AZ7BzgaXAMOC3NcgTSIEHYFlI8t+T874Ix2TZv1HSe5PMEO8oJXdCTh214Mt425eY2etZAmY2E/fxG4EJwK14wPw6MC9yserJ5S588nrczP5UK48qMRcYgQ+OJnzX7AF8Y2GxpPMq6HZp+8wVI9nqknQy8En8Q6yRdLGZvVCg0cmA2w9cYmabw/NLIQh8BbhQ0tDgJpWVrxkFOf4GGA98Fp+V/4CvahcB78NjolOBTlu2dUDifjyQJyDpFuD7wGzgXuBN4AxgGrBAUrOZ3RLE68JF0mTgJnxVGV8PHtXAzBRlvQxcI+m/ePum0h5Xxbpd2j5vxUgK2GZmj+AuSS/glwXb3RrSNalOnpS5G0hmmcFVykP7inA82UjyW3PeJ+Xncgy+6hh8Jn4LuDL8NuAftC2Idgh+a4Wks0L5W4FOZzdBZjgwHXjUzG40s41mtjsYdiwe6N4kqV+9uEiahG9zrgNazGxnrTzeASSbAsO6Eqxk+0LBt5ltkbQOaI4DvRy8EtLWnPfJwdvRVcqndQbk6CQ7DHkxSAfkcQy7YdPD7zAkHRXq2G5mm4rUUQJFgtWLQ9ppa9PMdktajQ+QQYTguhYukq7Hz4leBkbEO2E18Kg3EneoqaJUClm2r7hiRHh/SIsQXI7HCmdKyqojCa43VSkP7R1iZKwj6VjgU8Bu4C8F2pugDMcrgB74QVndEDrp+NCGByuI9gzpSTnvk/wiO0wVuYQg/x78jKKlyKAowaPeGBLSrN23Suhg+8MdStIASZ3cEklHhAOQ3sCq9DULSf0lnREfvpnZFuAx3Ge9LipvJO7nthJ2NMrKB53X8MC8L3Bt3Gx8xphvZrtq5Hhchnwz8EN8JSu6U1cUl+MB8OIugtU/h3SipFOi9n0enxj2AqtS+aW5SLo95D+PrxRdeQtleZSGpIHx+VTI74vHWhAONlPvStk+7UqNAqZJehqfmXcAJ+Mnkf3wwG5CVO5y/KDpdPygLY1r8WV8RjiXWBPkLsVH5dVm9nYN8uCnlKuA2fI7Vuvx6wstuAv1nUi+Go7LJO3BXYg2YCB+d2gPMMbM/hHJI+nS0G5oPz8YKmle+L/dzKbEegGJ+5F1QpzGw/j1mYuA9ZIeCe0fiLtZDcCt1vHuVykukq7ET88P4ANxshTHvGw2s3lxZgke1Xyvr+Dx00r8rlMbvk09GjgKj2fiayGlbJ8eGE8AH8K3rwbhW2y78A42H5jdVbCVhpltlfQx4Hv4vvow4D/4yjDNzFbXIh90XpN0Pu2XCEfhl+hmkX2JsBqOD+Ouxjg8xnkDN/Y0M9uaQ78ZD2zT6Ef7ecEWfMuwAyQNDG3rMlg1s4OSRuETyhV4PHEMsDPozjazpTVySc48jsS3NrPwFNE9qzI8Apop971W4OdYg/CVsQn3KJ7G7Tg/4xS9lO0buq+dd6MbnfF/W3ZYWy22CycAAAAASUVORK5CYII=", - "text/latex": [ - "$\\displaystyle 3.66039178927153$" - ], - "text/plain": [ - "3.660391789271528" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "erupt_mean" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "df132240-5a4b-46a7-a3bc-8ddc87464167", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMYAAAAQCAYAAABN/ABvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAABJ0AAASdAHeZh94AAAHjklEQVR4nO2ae4xV1RXGfyiidlS0WB/1DS0NWusQX+AbqbRRIdCqNQ1UTcAYJYiKsbXFxacxohUVtSogAUXTxrbWFlGKIpFSaUnsEDRYReRRrC+wo4hQysM/1j7MmeM+d+455/a/+ZKbPeecvfa39pr9WGvt3WXnzp10ohOdaI+u6QdJdwInAb2BA4HNwBrgGeBBM9tQb8OSLgLOBpqBE4B9gSfNbHid8sOBWeFxlJk9mvl+OTCjg2Z2mNnuKZkewDDgAuB44DBgK/BaaGuGme2ooleoU9iOklYDR+XQfmBmh1TlkdQFGBl+xwFdgDeAR4Gpsb6X0SslOxAYDfQHDgA24LaebGbPReofDtwKfB/oAbwX+iIz+08jOOqV2S0jcx3QBLwATAaeBLYBE4Blko7IM0IEvwjkzcC7BeQIPA8Cn9WothRQzu+lUOf5jMzFwDTgVODvwH3A74Fv44PjqTB4qugF5e34SU5/7m4QzxPAVOBo4Nd4n78CPAzMrNGfonoh6S7gRXzi/gmYBMwBvgacE6nfC3gVuAJYAtwLvANcCywOi1oljiIyXTNy+5nZlkhjtwM3Az8Dro4RRnAdsA54G985FtQjFAbmDHwWPw2Mi9Uzs6X45Ii1sTj8OTXz6S1gCDAnvTpKuhn/Z/wQ+AE+WUrpFVDWjq1mNqFGu6V5JA0DfgysAk4xs/XhfTe8vyMkPWNmT1fVS9Io4EbgMeBKM9ua+b5HROwh4CBgjJk9kKp7Dz6WbgeuqsJRRKbdjhEzcsBTofxmzvcvwcwWmNkKMysaxIwBzsVXjk0FZZF0PNAP36XmZHR6ycxmZ10GM3sfeCQ8nlNVr0basYE8w0I5KZkUoY2twPjwOLqqTpL2xAfxWiKDL3D+LyPTCxgErAZ+la2O23uEpKYKHIVksjtGHgaHclmd9UtBUh9gIu7rLZR0bolmrgzldDPbXkAuMcq2/5Ne0LEd9wwxzJH4YFgGLCzYjzyeJBZ4J1I/eXempG6RQVNEr/Nwt+Q+YIekC3BXdQuwxMwWR2QGhHJeZNHaKOmv+MTpB8wvyVFIJjoxJI0D9gG6477YGcEYE2P1GwFJXfGgdi3uBpRpY29gOLAd95+LcP8kPM5tlF4l7HgIbYF9glWSrjCzlyvyJLvEMZEmeoaya/j7nxX0OjmUW4AWfPCldV0IXGRmH6VefyuUb0V0A1iBT4ze+MQow1FIJht8JxiHb2FjcSPPBQZliBqNW4C+wOVmtrlkG5cA+wNzzexfBeQm4oZ6zsz+3EC9ithxBjAQH4RNeNZsCh4oPy/phIo8iVt5vaSvJi+DX61UvQMq6nVQKG8EdgJn4hnJ7wDzgLOA32Zkuofyk5z+Je/3r8BRSCa6YyQpOEkHA6fhA6dF0oVm9o8c5UtD0qn4ajwpZxusF4kbNaUA9xjgBnyVHNFIvYrY0cyUEX8duErSZ0G/CbTFCWV4fhP69z1guaQ/4qvnd4FD8R3xSCDryhTVK1lstwFDzGx1eH4tJADeBM6W1L/C/7oMRyGZvB0DADP7wMz+gG9jPYDHS3YkF8FVeRzfRsd3UL1WO8fhg2IdEM1fR2RG42nO5cAAM/u40XpBZTsmSYGzqvCEeGAw8FPgI+Cy8FuB221jqPphRb1aQ9mSGnyJDp8DyY58SupTsiN0J47kfWumLMJRSKau4NvM1khaDjRLOjCd1WgA9sF9R4AtUnaBAmCapGl48Ds2p51CQbeksXiu/HVgoJllB0Sj9NqFknZM3KGmOurW5AlZlzvDbxck7YVnsNab2ao6afL0ejOUrTlyyUHd3hGZ3sSRZNeSGKQKR10yNXeMDL4eyqIZko7wX2B6zq8l1FkUnqNbb/jHjgi6Te+IUNJN+KRYiu8UsVWysl45KGrHfqGMZZMaxXMp0A0/9KsXeXrNx334YyXFxlcS9KYnYHLGNSgrI2lf4HTgc+BvFTgKyezaMST1xo/42wVAoZHb8ODllfTRfMg/7wGszOaN60UIaEfGvkmagAe+j8WuXqRwMR40PttR0C1pPH7t4FU8QP04Vq+sXiXt2AdYa2abMjJH4yft4KfWVXn2M7NPM/WbgV/iK+bEzLfCeoXdajZ+kHotvgAlcoPwGKeVVPbPzFZKmoe7gNcAD6SaFL4rTUn0KMlRSCbtSp0P3CFpET5rNgAH46fWPYH3gVG0x3z8Hs0x+OFMW2+kocDQ8Jjk0PtLmhn+Xm9mtU6PiyBxo7In3e0g6TJ8UmwH/gKMibhIq81sZvZlAZSx44+AG0LKcA3u7/fC73TthcdM2esXZXhekLQZdx83An0Cx2ZgsJn9uwF6gQ/uvsA94bygBR8jQ3Hbj8xOaPyE/hXgfvldpjfwqzsDcBfq5w3gqFsmPTFeBL6Bp/v64qmxTUGpWcD9eatrDprx4C6NnrTlzNdQ+1pFXQir2hnUF3QnOfzd8dRmDC9T+95QRyhjxwV4Lr8v7jY04avXoiAzK3KDoAzP73C3aTjuS7+LLyZ3mNm6SF/K6IWZrZN0Ip7qHoIH6J8CswPXkojMSkkn0XaJ8Hz8EuFkIpcIS3LULdOl89p5JzrxZXwBF1RjY7RiTAsAAAAASUVORK5CYII=", - "text/latex": [ - "$\\displaystyle 3.14724353956066$" - ], - "text/plain": [ - "3.1472435395606566" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test['y_factual'].mean()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fd63daa-8d09-42cf-a20d-971b773a4e73", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From b35fcc0c975947ee0a5664a5547c894d5c4a0003 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:06:17 +0100 Subject: [PATCH 10/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/Multiple treatments examples.ipynb | 1291 ++++++++++++++++++ 1 file changed, 1291 insertions(+) create mode 100644 notebooks/Multiple treatments examples.ipynb diff --git a/notebooks/Multiple treatments examples.ipynb b/notebooks/Multiple treatments examples.ipynb new file mode 100644 index 00000000..69441483 --- /dev/null +++ b/notebooks/Multiple treatments examples.ipynb @@ -0,0 +1,1291 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "afbd11d5-c7d7-4467-9b80-fa73b58bae00", + "metadata": {}, + "source": [ + "# Multiple treatments examples\n", + "- In this notebook, we want to demonstrate common use cases where we have multiple treatments (ab testing is not covered, as you can just use another notebook for it)\n", + "- Simple CausalTune training with multiple treatments\n", + "- Applying custom propensities and train CausalTune (for example, in Uplift models)\n", + "- ERUPT for multiple treatments" + ] + }, + { + "cell_type": "markdown", + "id": "11352cfb-89b2-48be-adb5-e68cd95d2ce8", + "metadata": {}, + "source": [ + "### Setting up the data and causal model: CausalityDataset\n", + "- This notebook demonstrates how to train CausalTune with multiple custom propensities.\n", + "- The easiest example here is uplift modeling. Suppose we trained a model, sent messages, launched an experiment.\n", + "- And now we want to train more complex model using previous round model as propensities." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "fbcf1af0-3ad3-4666-b465-633f950409be", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import warnings\n", + "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from typing import List, Union\n", + "import random\n", + "\n", + "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "try:\n", + " import dowhy\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", + "\n", + "try:\n", + " import flaml\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", + " \n", + " \n", + " \n", + "from causaltune import CausalTune\n", + "from causaltune.datasets import synth_ihdp\n", + "from causaltune.data_utils import CausalityDataset\n", + "from causaltune.erupt import DummyPropensity, ERUPT\n", + "from causaltune.models.passthrough import passthrough_model" + ] + }, + { + "cell_type": "markdown", + "id": "8c238be1-3759-40ba-90bd-ebe30a9c9e5e", + "metadata": {}, + "source": [ + "### Data and generating treatments\n", + "- We first illustrate the model setup with a subset of data from the Infant Health and Development Program (IHDP).\n", + "- Then we synthetically create multiple treatments" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "36870c05-351b-4550-9af9-1aef6b68664d", + "metadata": {}, + "outputs": [], + "source": [ + "data = synth_ihdp(return_df=True).iloc[:,:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e2bf1659-9e49-4e5a-b3c2-767b04e4af57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3x4x5x6x7x8
015.599916-0.528603-0.3434551.1285540.161703-0.3166031.29521610
106.875856-1.736945-1.8020020.3838282.244320-0.6291891.29521600
202.996273-0.807451-0.202946-0.360898-0.8796060.808706-0.52655600
301.3662060.3900830.596582-1.850350-0.879606-0.004017-0.85778700
401.963538-1.045229-0.6027100.0114650.1617030.683672-0.36094010
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 x4 x5 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 0.161703 -0.316603 \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 2.244320 -0.629189 \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 -0.879606 0.808706 \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 -0.879606 -0.004017 \n", + "4 0 1.963538 -1.045229 -0.602710 0.011465 0.161703 0.683672 \n", + "\n", + " x6 x7 x8 \n", + "0 1.295216 1 0 \n", + "1 1.295216 0 0 \n", + "2 -0.526556 0 0 \n", + "3 -0.857787 0 0 \n", + "4 -0.360940 1 0 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9015a4f3-7df9-4b8d-ade5-6fa022dbb6ea", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAACYAAAAPCAYAAACInr1QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAABJ0AAASdAHeZh94AAABhUlEQVR4nM2Vvy8EQRTHP8cWJFfwB2glIgoaIqfxP1CdhASlKFQSntdpFBINdxdBr3L0Irn2Wko/Sh1BcU4xc3JZ83ZvL1d4zc7u9818vvvm7Wyu2WzyHyNqDVR1GThNyf8WkX5LVNUicOFv10SkHNM7ZkRtD+qAGslzwDxwk2BqBDgC3oC8kdYx49eYiNT9xBC05ocnhp7DVeIVuAS2QnlZGH2G+/YJE8AM8AJUjbQN3NuuAO9pa3bCSDUGrPtrRUQagUXHgH3gUERus5qyGInGVHUQKAINoBzQI1yzPwLb3TiyGJE5w8UiMARUReQpoO8Ck0BBRD66MWYx0rayVeLjuKCq07gqHYhILa5niCDDNKaq48As8Axcx7QIOAcegJ1uHSUxkrYyqenzwKgff6oGj6aSqpZwH8VmVkbQmKoOAEu4hqwEUr6M5wBTuL67A+6B4DanMayKLQDDwFWo6X2jrxrAPW/sLP5LysKweqxV4uBJ36NIZPwx5g/MAoGG7FV0wvgBj4OYCC1tHJMAAAAASUVORK5CYII=", + "text/latex": [ + "$\\displaystyle 747$" + ], + "text/plain": [ + "747" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(data)" + ] + }, + { + "cell_type": "markdown", + "id": "1051b3de-cc27-41df-8fe1-56aac7627dd4", + "metadata": {}, + "source": [ + "- Here we just randomly create multiple treatments" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "69cc5159-d305-44c1-9900-b6f57233e6ba", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "def generate_treatments(N: int, n_groups: int) -> List[List[Union[int, float]]]:\n", + " treatments = [random.randint(0, n_groups) for _ in range(N)]\n", + " values = [[random.random() for _ in range(N)] for _ in range(n_groups+1)]\n", + " row_sums = [sum(row) for row in values]\n", + " probabilities = [[prob / row_sum for prob in row] for row, row_sum in zip(values, row_sums)]\n", + " for i in range(N):\n", + " sum_probs = sum(row[i] for row in probabilities)\n", + " for row in probabilities:\n", + " row[i] /= sum_probs\n", + "\n", + " return [treatments] + probabilities\n", + "\n", + "N = 747\n", + "n_groups = 4\n", + "\n", + "treatments = generate_treatments(N, n_groups)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "09159be8-9f68-45fc-9e2a-e843ce975f0a", + "metadata": {}, + "outputs": [], + "source": [ + "data['treatment'] = treatments[0]\n", + "data['propensity_control'] = treatments[1]\n", + "data['propensity_treatment1'] = treatments[2]\n", + "data['propensity_treatment2'] = treatments[3]\n", + "data['propensity_treatment3'] = treatments[4]\n", + "data['propensity_treatment4'] = treatments[5]\n", + "\n", + "data['propensity_selected_group'] = data.apply(\n", + " lambda row: row['propensity_control'] if row['treatment'] == 0 else \n", + " (row['propensity_treatment1'] if row['treatment'] == 1 else \n", + " (row['propensity_treatment2'] if row['treatment'] == 2 else \n", + " (row['propensity_treatment3'] if row['treatment'] == 3 else\n", + " (row['propensity_treatment4'] if row['treatment'] == 4 else None)))),\n", + " axis=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "379a507a-f85c-435a-8abf-ee1b27adb66d", + "metadata": {}, + "source": [ + "- Now we have `treatment` (0, 1, 2, 3, 4)\n", + "- Outcome `y_factual`\n", + "- Features `x1, x2, x3, x4, x5, x6, x7, x8`\n", + "- Propensities `control, treatment1, treatment2, treatment3, treatment4`\n", + "- selected treatment related propensity `propensity_selected_group`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c13377bd-0608-4ef7-b3a5-0a44e36f32f4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3x4x5x6x7x8propensity_controlpropensity_treatment1propensity_treatment2propensity_treatment3propensity_treatment4propensity_selected_group
025.599916-0.528603-0.3434551.1285540.161703-0.3166031.295216100.1079490.3415070.0298070.2579140.2628230.029807
136.875856-1.736945-1.8020020.3838282.244320-0.6291891.295216000.3298600.0038110.2709320.2633060.1320910.263306
212.996273-0.807451-0.202946-0.360898-0.8796060.808706-0.526556000.2952830.0829310.2196730.2000070.2021050.082931
341.3662060.3900830.596582-1.850350-0.879606-0.004017-0.857787000.3431190.1912040.0410450.2271610.1974710.197471
441.963538-1.045229-0.6027100.0114650.1617030.683672-0.360940100.2699940.0572990.4029250.0362410.2335410.233541
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 x4 x5 \\\n", + "0 2 5.599916 -0.528603 -0.343455 1.128554 0.161703 -0.316603 \n", + "1 3 6.875856 -1.736945 -1.802002 0.383828 2.244320 -0.629189 \n", + "2 1 2.996273 -0.807451 -0.202946 -0.360898 -0.879606 0.808706 \n", + "3 4 1.366206 0.390083 0.596582 -1.850350 -0.879606 -0.004017 \n", + "4 4 1.963538 -1.045229 -0.602710 0.011465 0.161703 0.683672 \n", + "\n", + " x6 x7 x8 propensity_control propensity_treatment1 \\\n", + "0 1.295216 1 0 0.107949 0.341507 \n", + "1 1.295216 0 0 0.329860 0.003811 \n", + "2 -0.526556 0 0 0.295283 0.082931 \n", + "3 -0.857787 0 0 0.343119 0.191204 \n", + "4 -0.360940 1 0 0.269994 0.057299 \n", + "\n", + " propensity_treatment2 propensity_treatment3 propensity_treatment4 \\\n", + "0 0.029807 0.257914 0.262823 \n", + "1 0.270932 0.263306 0.132091 \n", + "2 0.219673 0.200007 0.202105 \n", + "3 0.041045 0.227161 0.197471 \n", + "4 0.402925 0.036241 0.233541 \n", + "\n", + " propensity_selected_group \n", + "0 0.029807 \n", + "1 0.263306 \n", + "2 0.082931 \n", + "3 0.197471 \n", + "4 0.233541 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "98330541-4245-4790-84d3-0deb00aeb1bc", + "metadata": {}, + "source": [ + "Generally, at least four arguments have to be supplied to `CausalityDataset` if you want to train a model:\n", + "- `data`: input dataframe\n", + "- `treatment`: name of treatment column\n", + "- `outcomes`: list of names of outcome columns; provide as list even if there's just one outcome of interest\n", + "- `effect_modifiers`: list of names of feature columns;\n", + "\n", + "In addition, if the propensities to treat are known, then provide the corresponding column name(s) via `propensity_modifiers`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7b8011b6-1c23-43de-b76a-52baf74dc656", + "metadata": {}, + "outputs": [], + "source": [ + "features = [\n", + " 'x1',\n", + " 'x2',\n", + " 'x3',\n", + " 'x4',\n", + " 'x5',\n", + " 'x6',\n", + " 'x7',\n", + " 'x8'\n", + "]\n", + "\n", + "propensities = [\n", + " 'propensity_control',\n", + " 'propensity_treatment1',\n", + " 'propensity_treatment2',\n", + " 'propensity_treatment3',\n", + " 'propensity_treatment4'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1a894d9f-1b20-4f83-8425-f38c1bebf146", + "metadata": {}, + "outputs": [], + "source": [ + "cd = CausalityDataset(\n", + " data=data,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers = features,\n", + " propensity_modifiers = propensities\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8df6ecc4-7758-4f99-be0d-a21b118b56d1", + "metadata": {}, + "source": [ + "- To transform categorical columns and do some manipulations" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cbd3c8f9-38db-4458-9691-e6d80c448f08", + "metadata": {}, + "outputs": [], + "source": [ + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "markdown", + "id": "d0288aa2-e1b2-4bf8-877d-b45c52f3bc74", + "metadata": {}, + "source": [ + "### Train CausalTune with simple energy distance (multiple treatments)\n", + "- Here we fit a (selection of) model(s) to the data and score them with the energy distance metric" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fd3239ff-6d8e-4433-b307-e58ff237d807", + "metadata": {}, + "outputs": [], + "source": [ + "# training configs\n", + "\n", + "# set evaluation metric\n", + "metric = \"energy_distance\"\n", + "\n", + "# it's best to specify either time_budget or components_time_budget, \n", + "# and let the other one be inferred; time in seconds\n", + "components_time_budget = 10\n", + "\n", + "# specify training set size\n", + "train_size = 0.7" + ] + }, + { + "cell_type": "markdown", + "id": "088b75a1-c390-495f-be80-b253bcbc2a38", + "metadata": {}, + "source": [ + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79762873-547f-4c0a-8637-c7ea11e86ca4", + "metadata": {}, + "outputs": [], + "source": [ + "ct = CausalTune(\n", + " estimator_list=[\n", + " \"DomainAdaptationLearner\",\n", + " \"CausalForestDML\",\n", + " \"ForestDRLearner\",\n", + " ],\n", + " metric=metric,\n", + " verbose=1,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "6e54c819-d1c4-488e-89dd-37320362f561", + "metadata": {}, + "outputs": [], + "source": [ + "# run causaltune\n", + "ct.fit(data=cd, outcome=cd.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3c0969e0-60b2-48db-895a-e3045487c5d3", + "metadata": {}, + "outputs": [], + "source": [ + "preds = ct.effect(cd.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9e68cb46-0d2c-49e5-9c4c-622038afd8db", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.17528909, -0.10834053, -0.27350324, -0.27354165],\n", + " [ 0.19621234, 0.30247269, -0.1484149 , -0.25184741],\n", + " [ 0.11457749, 0.02436523, -0.04183348, 0.23708047],\n", + " ...,\n", + " [-0.96454594, -0.57960515, 0.08710834, -0.38235969],\n", + " [-0.69400728, -0.03214504, -0.0508105 , 0.27500816],\n", + " [-0.02277706, -0.39042558, 0.05169252, -0.22645511]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds" + ] + }, + { + "cell_type": "markdown", + "id": "9b9218e5-33ac-47c0-a3af-f264620e78be", + "metadata": {}, + "source": [ + "- Here we want to get results from the predictions\n", + "- We do naive argmax, but it is also recommended to use Thompson Sampling" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2f437600-0161-4439-a30c-695dca858e6e", + "metadata": {}, + "outputs": [], + "source": [ + "predicted_treatments = np.argmax(preds, axis=1) + 1" + ] + }, + { + "cell_type": "markdown", + "id": "31cf24b5-013b-43a9-9db9-85f214f0dad7", + "metadata": {}, + "source": [ + "### Train CausalTune with custom multiple propensities\n", + "In some settings such as uplift modelling, the experiment / study is based on heterogeneous treatment propensities known to the researcher / experimenter. An array of treatment propensities can be directly supplied to CausalTune in the data instantiation of the `CausalityDataset`. This can, e.g. be done by \n", + "```\n", + "cd = CausalityDataset(\n", + " ...\n", + " propensity_modifiers=[]\n", + " ...\n", + ")\n", + "```\n", + "and then using the `passthrough_model` as follows" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "01347374-8524-4906-8a80-af02602fbfc1", + "metadata": {}, + "outputs": [], + "source": [ + "propensity_model=passthrough_model(\n", + " cd.propensity_modifiers, include_control=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f0255a70-dd9d-45e6-ae6a-4b5bc6001714", + "metadata": {}, + "outputs": [], + "source": [ + "# set evaluation metric, if we want to use custom propensities, we should use \"psw_energy_distance\" metric\n", + "metric = \"psw_energy_distance\"" + ] + }, + { + "cell_type": "markdown", + "id": "c1efc7ac-9530-4dfe-9e7b-0797ff9529a4", + "metadata": {}, + "source": [ + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ef2b635b-c92f-45c2-9038-c69d4ddd5c62", + "metadata": {}, + "outputs": [], + "source": [ + "ct2 = CausalTune(\n", + " estimator_list=[\n", + " \"DomainAdaptationLearner\",\n", + " \"CausalForestDML\",\n", + " \"ForestDRLearner\",\n", + " ],\n", + " metric=metric,\n", + " verbose=1,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " propensity_model=propensity_model,\n", + " outcome_model=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "ed6ad9d8-efcd-4123-935e-ebac56b39143", + "metadata": {}, + "outputs": [], + "source": [ + "# run causaltune\n", + "ct2.fit(data=cd, outcome=cd.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct2.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct2.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct2.best_score}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4116f254-1428-4283-9a44-095ac5de95d5", + "metadata": {}, + "outputs": [], + "source": [ + "preds2 = ct2.effect(cd.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1768aa4c-43c6-47d6-8cc7-2626469ce3f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.82748584, -0.38301612, -0.28167136, 0.59362769],\n", + " [-0.47553063, -1.32764429, -0.78241589, -0.11771117],\n", + " [-0.56505201, -2.38952635, -1.25667425, -1.26012244],\n", + " ...,\n", + " [ 0.35639461, 0.81618186, -0.33151397, -0.45705827],\n", + " [ 0.23314607, 0.54855842, 0.42367241, 2.13179076],\n", + " [ 0.52856153, 0.5723138 , 0.21125856, 0.57083639]])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds2" + ] + }, + { + "cell_type": "markdown", + "id": "196bd451-6663-4d59-84f7-d65eb2547015", + "metadata": {}, + "source": [ + "- Here we want to get results from the predictions\n", + "- We do naive argmax, but it is also recommended to use Thompson Sampling" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e3265a9e-b58e-477e-a1a9-c4af38b8b422", + "metadata": {}, + "outputs": [], + "source": [ + "predicted_treatments2 = np.argmax(preds2, axis=1) + 1" + ] + }, + { + "cell_type": "markdown", + "id": "f1a6c03f-ce5a-4b7e-91d0-146bd0feba77", + "metadata": {}, + "source": [ + "### Run ERUPT with multiple propensities\n", + "Below we demonstrate how to use Estimated Response Under Proposed Treatment (ERUPT) to estimate the average treatment effect had the treatment been assigned randomly. Recall that the dataset used in this example is constructed in a way that the treatment propensity is a function of a unit's covariates." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ff674637-1136-46b6-ae6d-a0cdd3218b56", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "test = synth_ihdp(return_df=True).iloc[:,:10]\n", + "N = 747\n", + "n_groups = 4\n", + "\n", + "treatments = generate_treatments(N, n_groups)\n", + "\n", + "test['treatment'] = treatments[0]\n", + "test['propensity_control'] = treatments[1]\n", + "test['propensity_treatment1'] = treatments[2]\n", + "test['propensity_treatment2'] = treatments[3]\n", + "test['propensity_treatment3'] = treatments[4]\n", + "test['propensity_treatment4'] = treatments[5]\n", + "\n", + "test['propensity_selected_group'] = data.apply(\n", + " lambda row: row['propensity_control'] if row['treatment'] == 0 else \n", + " (row['propensity_treatment1'] if row['treatment'] == 1 else \n", + " (row['propensity_treatment2'] if row['treatment'] == 2 else \n", + " (row['propensity_treatment3'] if row['treatment'] == 3 else\n", + " (row['propensity_treatment4'] if row['treatment'] == 4 else None)))),\n", + " axis=1\n", + ")\n", + "\n", + "def randomize(value):\n", + " return value + np.random.uniform(-1, 1)\n", + "\n", + "for col in features:\n", + " test[col] = test[col].apply(randomize)\n", + "\n", + "test['y_factual'] = test['y_factual'].apply(randomize)\n", + "test['treatment'] = test['treatment'].sample(len(test))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "74e51a09-3a76-47c8-a7f4-7665d48873fa", + "metadata": {}, + "outputs": [], + "source": [ + "test = test[test['treatment'] != 0].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "345bfe6d-dbdd-4ac9-a8d1-d15860c691ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3x4x5x6x7x8propensity_controlpropensity_treatment1propensity_treatment2propensity_treatment3propensity_treatment4propensity_selected_group
046.175675-0.835516-0.9890580.7202542.4099330.0960921.6190430.1405740.3294780.1435080.0096680.2491500.3400220.2576520.263306
122.253838-1.7331910.276954-0.2289630.2045940.789086-0.2051580.050398-0.1785370.2198510.3954940.2719650.0403870.0723040.233541
213.796029-0.2201100.052470-0.006594-0.711016-0.1483932.7663980.7096640.0092750.2735720.1694010.0309700.2473430.2787140.151098
331.4174700.9511411.584397-0.9372510.432877-1.439677-0.0904200.5223250.3226850.3729680.0986400.1808420.3075480.0400020.059175
441.6303520.019401-0.3625640.066324-0.6505830.240731-1.5128000.389906-0.9231980.0211480.0210580.3617840.5504610.0455500.073713
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 x4 x5 \\\n", + "0 4 6.175675 -0.835516 -0.989058 0.720254 2.409933 0.096092 \n", + "1 2 2.253838 -1.733191 0.276954 -0.228963 0.204594 0.789086 \n", + "2 1 3.796029 -0.220110 0.052470 -0.006594 -0.711016 -0.148393 \n", + "3 3 1.417470 0.951141 1.584397 -0.937251 0.432877 -1.439677 \n", + "4 4 1.630352 0.019401 -0.362564 0.066324 -0.650583 0.240731 \n", + "\n", + " x6 x7 x8 propensity_control propensity_treatment1 \\\n", + "0 1.619043 0.140574 0.329478 0.143508 0.009668 \n", + "1 -0.205158 0.050398 -0.178537 0.219851 0.395494 \n", + "2 2.766398 0.709664 0.009275 0.273572 0.169401 \n", + "3 -0.090420 0.522325 0.322685 0.372968 0.098640 \n", + "4 -1.512800 0.389906 -0.923198 0.021148 0.021058 \n", + "\n", + " propensity_treatment2 propensity_treatment3 propensity_treatment4 \\\n", + "0 0.249150 0.340022 0.257652 \n", + "1 0.271965 0.040387 0.072304 \n", + "2 0.030970 0.247343 0.278714 \n", + "3 0.180842 0.307548 0.040002 \n", + "4 0.361784 0.550461 0.045550 \n", + "\n", + " propensity_selected_group \n", + "0 0.263306 \n", + "1 0.233541 \n", + "2 0.151098 \n", + "3 0.059175 \n", + "4 0.073713 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "edeff630-5d92-4f1a-9510-e9272c477294", + "metadata": {}, + "outputs": [], + "source": [ + "cd_test = CausalityDataset(\n", + " data=test,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers = features,\n", + " propensity_modifiers = propensities\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "151b9267-310b-4f6d-adaf-db2d44f63c6d", + "metadata": {}, + "outputs": [], + "source": [ + "cd_test.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "46659420-fa53-4c5b-891e-a3fc99bc84cf", + "metadata": {}, + "outputs": [], + "source": [ + "preds_test = ct2.effect(cd_test.data)" + ] + }, + { + "cell_type": "markdown", + "id": "0788bc9e-5b0e-4b86-b128-7c5f0b11770e", + "metadata": {}, + "source": [ + "- Here we want to get results from the predictions\n", + "- We do naive argmax, but it is also recommended to use Thompson Sampling" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "30d23496-c5d6-4702-be34-01d7a85e8d88", + "metadata": {}, + "outputs": [], + "source": [ + "treatments_test = np.argmax(preds_test, axis=1) + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "4832a0d9-18e9-4a2f-9e72-ef207d1d8fb5", + "metadata": {}, + "outputs": [], + "source": [ + "test['predicted_treatment'] = treatments_test\n", + "use_df = test" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "470e4aae-a435-48f1-95bd-35d9d5d71c84", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 528.26it/s]\n" + ] + } + ], + "source": [ + "# computing mean ERUPT over 10 bootstrapped samples\n", + "from tqdm import tqdm\n", + "scores_list = []\n", + "\n", + "for i in tqdm(range(10)):\n", + "\n", + " bootstrap_df = use_df.sample(frac=1, replace=True)\n", + " propensities = bootstrap_df['propensity_selected_group']\n", + " actual_treatment = bootstrap_df['treatment']\n", + " outcome = bootstrap_df['y_factual']\n", + "\n", + " # define the random assignment policy\n", + " random_policy = bootstrap_df['predicted_treatment']\n", + "\n", + " # define a propensity model that will simply return the propensities when calling predict_proba\n", + " propensity_model = DummyPropensity(p=propensities, treatment=actual_treatment)\n", + "\n", + " # obtain ERUPT under random policy\n", + " e = ERUPT(treatment_name='treatment', propensity_model=propensity_model)\n", + " scores_list.append(e.score(df=use_df,outcome=outcome,policy=random_policy))\n", + "\n", + "erupt_mean = np.mean(scores_list)\n", + "erupt_sd = np.std(scores_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "259f373d-691c-4178-921d-3bd1cfd04b07", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMQAAAAQCAYAAABJCdBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAABJ0AAASdAHeZh94AAAHgElEQVR4nO2aa6xWxRWGH46nQUspGBFJbSqKUvEK0VqolktRVLCKF7Q/8JYWaqxBW4/Uqu3ibWLRpkHAGzY1YKl/vATUCIgikVptSQSixsuhVlSaigWKBSmxAv2xZpfNPnufb2Z/J/46b3IyZ8+smTXfu2bNZc302Lt3L93oRjccrdk/kg4BLgQmACcChwOfAq8B84H5ZrYntmFJE4DrgeOAQ4B/AK8As8zs5YLsVUFHZ9hjZgcU6m0AjqiQ32RmA0r69VXgl8A5uX4tBmRm/yqRr6PjEmAUMBQ4GegNPGxmkyvaSeUr2VapHDdhkzuBU4HBQD/gP8B7OMf3mNmWYiN1+Ar1xgLXASOAg4EtOAdzzGxJiXxDjltz8pOA+4PQSuB94DDgIuB3wLmSJplZwyUlkDI9dHAxsBk4GrgAuFjSFWb2h1yVdYAqmvs28B1gaUX5x8DskvwdJf0aBLwE9AeeAN4CTsNJOkfS6WUGS9ERcBtu2B3ARuDYCrmsX6l81bHVOtI4TpXP8GNgDfAs8BHQCxgOzACmShpuZh8U6iTxBSDp18BNQf5JnLNDgVOA0cCSgnwUx3mHaAfOB57Ozy6SbgFWAxfjhD/eoKMDgDZgE3CSmX2UKxsDPI/P0P83sJmtww1Q1l42O/62QuU2M5vRWZ9yuA93hmlmdndOxyzckLcD1zSpg9DWRuCv+My3skqwDl/UsFUqx03Y5Mtmtqukzu3ALcDPgGsLxdF8hbam4M7wEDDVzD4tlH+h8B3NcUtWYGbPm9lTxaXWzD4E5oXP0Z11NOAIoAX4S15xaGslsB335IaQdCI+u/wdeDqmTidtDQLGARuAewvFBnwCXC6pVzN6wH+nma2PWU2pwVcX2iqZ40byZc4Q8EhIjympE82XpJ74xPU+Jc4Q2vtvISua41bikCn4LEJ2Pb6fPU1SPzPbnBVIGonvDxdH6p0a0gfNbHeFTE9Jk4Gv4YP6VWBVifyYkC4vGUjbJf0Jd5jhwIqaOuqgK/mCNFtBHMfNyGf4bkhfTahThrPwwTsb2BPOBScAu4DVxfNWQDTHDR1CUitwRfhc1kjezLZK+ikwC3hD0mJ83zYIX+afBX4YofcgYDKwG98XV2EAsLCQ966kq83shVze10PaXtHOetwhBtPRIWJ1JKOr+IJ0WyVwnCwvqQ34EtAHP2SfgTvDHY30NMA3QroLWIs7Q17vKuASM/tnlpfCcQuNcUdQusTMnonpsZnNxvewrcAU4Gb8IPgBsKC4bFXgUqAvsKzkEJZhPjAWH7C98IjLA8BAYKmkk3OyfUL6cUVbWX7fJnTUQhfxBem2iuG4rnwbvhW9AXeGZcC4/ECtif4hvQnYix/wewMnAcuBkcCjxUqxHHe6QkiaBtyIR2Muj+2xpOnAr4C5wD3Ah3jkYCbwsKShZja9QTPZ0vxAlYCZFaMgrwPXSNoR+j0DD0/Wxuehoyv4qmmrhhzXlc/C0ZIOA76FO+taSeeZ2ZpIfWXIJvHPgPPNbEP4fk3ShcDbwChJI/Lbp1iOKx1C0nXAHOANYKyZbY3praTRwJ3AIjP7Sa5oTehwO3CjpHlm9reKNo7HSdxIIXwWiXn44BiZy8tWgD4dxffL39aEjmR0EV/JtkrluK5NzGwTsEjSmvBbfk9hm5OIbSFdm3OGTNdOSc8A38fD6S+Hvo8mkuPSLZOkG4C78dlwTIhexOK8kHYInZnZTjws2AIM66SNuge3DNmynI8YvR3SwRV1suhH1RkjRkcdNMVXE7b6vA7TAJjZe7jDHi+pX2r9HDI7bqsozy5XD8rlRXPcwSHC4eMuPAY9JmH/mqFnSKtCq1l+h3BZ0H8gvuTvBh5M1J1heEjzM2pGxjhJ+/1uSb2B04GdwJ+b0FEHtfmqa6tUjrvIJgBfCWkz0bkV+NnhuKIdA7LV591cXjTHxYHxc3yv9wq+9G4u1izID5J0bOEi5I8hnSrp8IL8ufjA24XfGJdhEn4Nv7Szg5ukIWV3BpIG4ntE2P/y7x380DUQ+FGxGj7TLzSzT+rqqIlafKXaqoAojlPlJQ2W1GFLKqklXMz1B14qeyITi7DSPIWHwK8v6BkHnI2vHvkoWzTH+bdMV+K3dbtDA9OkDjf3G8xsQe57BX7pcSR+4QXwGPAccCbwpqRF+AFmCL509QBurngiAfuW5qqb6QyX4fu+Vfhbme14GG0CcCC+z/1Noc61+MCaK38H8ybwTfyOoh24tQt0IGkiMDF8Zm+dRkhaEP7fbGZt4f9kvmraKo9YjlPlxwMzJb2Iz9Bb8Cclo4Cj8N81pVgpkS/wCW0YMCvcQ6zFx+BEnJMfmFk+mhjNcf5QfWRID8BDZWV4AVhQUQaAme2RND50+nt4BOaLwFZ8AM01s+VldSUNwUN0MQe3lfjdwjDcw3vhM8OL+J3BwuLNp5m9I+lU9j3uG4+/B5pD+eO+ZB0BQ4ErC3lHhT9w52oLfarDV21bJXKcKv8c/j7oDJyzvvhFZjvO19yKA/9QIvkCMLONkk4BfoHfI4wE/o2vHDPNbHW+oRSOe3Q//+5GN/bhf0T1PFykXNI7AAAAAElFTkSuQmCC", + "text/latex": [ + "$\\displaystyle 2.87505182773168$" + ], + "text/plain": [ + "2.87505182773168" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "erupt_mean" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "df132240-5a4b-46a7-a3bc-8ddc87464167", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMQAAAAQCAYAAABJCdBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAABJ0AAASdAHeZh94AAAHn0lEQVR4nO2ae7BXVRXHPyCiDioZivYyjJFCsmwyvIaKSpL5mqioxoHAGWAcZYAEpqKh5bcZp2ulSdFD1EHFGXthGimkkROZNs7kZbRBxCnAKF9gl8G8RAL9sfbhnnvuPr+797n+edfMmf07+7XWd63f3mvttc+ggwcPMkADNEBOQ8ovkm4EzgTGAMcDXcB24H5guZntyplc0ruBbwIXAyOAF8NcMrN/V/oOAmaFZxwwCHgWuB1YYWYHIvNvA95bw/5lMzuphWyTgLnA2cBxwC7gGWCZmT0U+owApgCXAqcD7wL2hX4rgZVVuSTNDG2t6ICZHfZWYMnRcWlMn9j7MyYHSxO7R2SbBqwKr7PN7PZKe7IdeywI4MvAU8AjwCvAMKANuB6YI6nNzP7Rl4BBiNHA48BI4AFgMzAemA9cLGlCZYHdA1wZ+N4LvAFcBPwY+DjwpRpWu4FbIvWvt5Dt28BiYAfwa2AncALwUeB8oDDw1MD/ReBR4AXgROAzuME+JWmqmZXd7EZANazPBS4E1r4VWBroOAd7v8ZkYGlq90K29wDLw7xH13RLtmN1QRxrZnsjTG8AlgBfA65pJWCJfoQbap6Z/aA01834wrsBuDrUTcGVshUYb2Y7Q/1QYDUwXdL9ZnZfhE+nmV2fKBOSZuPGvQuYY2b7Ku2Hl163AFcAD5Z3KklLgCeBz+JKXV20mdlGfFHEeD8Rfq6oES8LCxk6DvU52BuPScXST7sX3mUl7q3uAxbVsEq24+DyqNhiCPTzUJ7aCmCJ0WhgMrAN+GGl2YD/4GCHhbopobypUEqQZx+wNLzOTeHdh1xH4H+SF4gYN/D8X+n3781sTdVtm9lLwE/C6/mJvE/Hve0/gQcbAeg5X5aOc7E3HZNJ/bX7PNzjXoXjjVKOHaseoo4uD+XTif0vCOXDESH2SPoTbsw2YD1QxJR/j8xV1J0raWjEKEeEGPJkXClPAxvMbH9krotwV38LcEDSpcAHgb3Ak2b2RGRMHRV/hDcT+88J5R01skEellwdN8HeH32lYGlsd0ljgXb8DLNB0oUtZGlFPewYXRCSFuHx2HD8kH0ODqg9kcn7Q7mlpv153FhjcGMVu8Mpkb7vC+WQ8Htzpf0kug9UBW2VdJWZ/aFS/7FQ7gU6cOMeIkkbgM+Z2as1chf9htAd265r1Tf0PwqYBuzHY9Y6ysGSq+Mm2PujrxQsjewe9L8K91xLImOTKGbHwTV9F+FudwG+GNYBk/v6o5RoeCh317QX9W8LZRFCXCfp7SWBD6fnAfW4yjwrgUm48ofhGYRbgVHAWkkfrvQfGcrFwEH8kHsM8CHgYeA84Bf1sA5RO/7neMjMfpvQ//M41nUtkhK5WHJ13AR7U32lYmlq928AHwFmmllXhH8q9bJj1EMUaTFJJ+In/XagQ9JlZvZUPwSoo58C04FPApskPYDvSp8A3oHvBCcD1dCgms35K3C1pNeBhXh2bEqpvdgA3gSuMLNt4f2ZcMB7Dpgo6ey6cEDSvDD35iBzChXh0q11HRpgyaUm2BvpKwNLtt0lnYV7hZsyQ9weVGfHOg9RAHvZzH6Fu94RwN2J/IrdaXhNe1HfGfjsx88pXwVeBWaE53l8Qe4J/V9J5F8clM6r1HeGsqNkXIIMbwDFbj8+NqmkucAyYBNwgZm91pcgksbhGHYQT0/2RXVYsnRMM+xNxrSiHlhy7R5CnLvxMHEpDamVHZMO1Wa2XdIm4AxJx5czAjX0XCjH1LQX2apD8W/IVtwYnrLwR4b+O81sa4q8uHLB3XVMrs6accVF1lHVBkkLgO/hu90kM0tdnCmH6VbUF5ZUHTfB3lhfNdQLS6bdj6Yb714pet1zm6Tb8MP2gmpjX3Zs6SEq9M5Qphj10VBOltSDh6RjgAn4BcyfE+b6IjAUv7RJpbZQVrMX6/FY+LSqXIGKQ2OPhSfpK7gSN+I7StJiCEadjuvsjiTJe1MdllwdN8HeSF8tqA5LjGJ2/y+ux9jTEfo8Ft57hVMpdhxc6jxGUi/3K2lwuJgbCTxe/hxA0mhJH6hezpjZ3/BD1yjg2uqU+A6xyswO5Y4lHRvhfQbwHXwnaq+0jS3dY5TrR+E3l+C3oGW5tgNr8Lh0fmXcZDyW7aSUOZK0NPD+C76j9OUdyzQVPxCubXXD3xBLlo6bYG+orywsOXY3sy4zmxV78Bt0gLtC3c8qcybZsRwyXQJ8S9Jj+IrfhV9vT8TTXi8Bsyvj1+PfrJyCXxCV6Rr8s4Lvy7+DeRY4C8+fbwG+Xun/iKQu3JXtAcbi3550AZeb2b8q/b8ALAypv+1hzOgw5kg8Xv9uBPO1eIbi5pBX7wjyfxrfyWeZ2W4ASTPw74T2A38E5kXc9DYzuzPCB7rDpbqb6f5iydVxMvZ+jMnFkmv3bMqxY9kN/g53NSfg19iL8Svt1/AdZ5yZbUoVIuxgZwJ34kZaiCtmGdBmvT8U/CWe0psGXIen9lYAp0Vy8OAhw2/CnFeGMRNxlzkDuKzmZnUH/g3OcjxGnY/fUq4BJpjZ6lL3Ij9+GJ6CtsgzM4Y/XBydQ9phuimWLB1nYm86JhdLrt2bULIdBw18/j1AA9RN/wfsRI58Ob5+jgAAAABJRU5ErkJggg==", + "text/latex": [ + "$\\displaystyle 3.09562750665942$" + ], + "text/plain": [ + "3.095627506659421" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['y_factual'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fd63daa-8d09-42cf-a20d-971b773a4e73", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ebbff47208cd5ee33c64511b85a959612295fb19 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:07:48 +0100 Subject: [PATCH 11/22] Delete notebooks/AB testing.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/AB testing.ipynb | 461 ------------------------------------- 1 file changed, 461 deletions(-) delete mode 100644 notebooks/AB testing.ipynb diff --git a/notebooks/AB testing.ipynb b/notebooks/AB testing.ipynb deleted file mode 100644 index 53080e1b..00000000 --- a/notebooks/AB testing.ipynb +++ /dev/null @@ -1,461 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# AB Testing with CausalTune" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import pandas as pd\n", - "import numpy as np\n", - "import warnings\n", - "\n", - "from sklearn.ensemble import RandomForestRegressor\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "import gc\n", - "\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "from causaltune import CausalTune\n", - "from causaltune.data_utils import CausalityDataset\n", - "from causaltune.datasets import generate_synth_data_with_categories\n", - "\n", - "from flaml import AutoML\n", - "import matplotlib.pyplot as plt\n", - "%pip install seaborn as sns\n", - "import seaborn as sns\n", - "%matplotlib inline\n", - "\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", - "%pip install plotly\n", - "import plotly.io as pio\n", - "pio.renderers.default = \"png\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Note*: This notebook uses the the package *wise-pizza* which is not listed as a requirement to run CausalTune. It is merely used to showcase what is possible as an AB testing workflow.\n", - "\n", - "Install via\n", - "`pip install wise-pizza`" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install wise_pizza" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import wise_pizza as wp" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CausalTune for AB Testing \n", - "\n", - "CausalTune can be used for AB Testing in two ways:\n", - "1. Variance Reduction\n", - "2. Segmentation analysis\n", - "\n", - "#### 1. Variance Reduction\n", - "A standard variance reduction technique is to control for natural variation in the experiment's outcome metric. The simplest way to do so is by running a simple regression with a selection of controls. A potentially more powerful and automated approach is to run CausalTune. \n", - "\n", - "#### 2. Segmentation Analysis\n", - "\n", - "We use the heterogeneous treatment effect estimates from CausalTune to feed them into the segmentation analytics tool Wise-Pizza." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Data Generating Process" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We first create synthetic data from a DGP with perfect randomisation of the treatment as we are replicating an AB test environment" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There is substantial variation within the outcome metric per variant which can be seen from the cdf per variant:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAG2CAYAAACZEEfAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeWUlEQVR4nO3dd3QUVR/G8e+mE0qQFjpBQDoBaQYEQdBQRLBRRDrYQMHYQAVEVKyIBUVFwPqCImChiaEJgkhTesfQEkokCYG03Xn/WNxlSYGE7G5283zOyeHe2Tub37CGPM7cmWsyDMNARERExEv5uLsAEREREWdS2BERERGvprAjIiIiXk1hR0RERLyawo6IiIh4NYUdERER8WoKOyIiIuLVFHZERETEqynsiIiIiFdT2BERERGv5taws3r1arp160bFihUxmUwsWLDgivusXLmSG2+8kcDAQGrWrMmsWbOcXqeIiIh4LreGneTkZMLDw5k6depVjT906BBdu3alffv2bN26lVGjRjF06FCWLl3q5EpFRETEU5kKykKgJpOJ+fPn06NHj2zHPPvssyxcuJDt27fbtvXu3ZuzZ8+yZMkSF1QpIiIinsbP3QXkxrp16+jYsaPDtsjISEaNGpXtPqmpqaSmptr6FouF+Ph4SpcujclkclapIiIiko8MwyApKYmKFSvi45O7C1MeFXZiY2MJDQ112BYaGkpiYiIXLlygSJEimfaZNGkSEyZMcFWJIiIi4kRHjhyhcuXKudrHo8JOXowZM4aoqChbPyEhgapVq3LkyBFKlCjhxspERERc63RSKgdOneN/G2L4ddfJK44PKx1M4vkUqgYk0Sp5GYGkk44ftU1HCCaFcJ8DBJkyoPQNULkZhN0MRctBpSbgF5ivtScmJlKlShWKFy+e6309KuyUL1+euLg4h21xcXGUKFEiy7M6AIGBgQQGZv4LL1GihMKOiIh4PYvFYP3BM7yyaBc7jifatvsEBtvaFUOCOJmUyp2NK9KkcgjtiuyncsxPmI5ugIxd1kFFL3nTys2h3YtQqSkUKemS4/hPXqageFTYiYiIYNGiRQ7bli1bRkREhJsqEhERyV+pGWZS0i0kXkjn9LlULIZ1vorFAIthYLFc0jYMDAMOnU7G38+HDLOFdLOFbccS2RLzLxlmg9jElEzfo16FEviYYEzb0rRKWmoNNad2w+EE2Hkm68JqdoQbB0C9O538N5D/3Bp2zp07x/79+239Q4cOsXXrVkqVKkXVqlUZM2YMx44d44svvgDg4Ycf5oMPPuCZZ55h8ODBLF++nG+//ZaFCxe66xBERERyLTElnVd+3sWFdDPn08z8uiuOsNLBHD5z3qnf9+5KZ3nNeI+AwCA4sRUWXGGHGh2gZgcI7wPBpZxamzO5Nexs3LiR9u3b2/r/za0ZMGAAs2bN4sSJE8TExNher169OgsXLuSJJ57g3XffpXLlykyfPp3IyEiX1y4iIpJb01Yd4LM1hziVlJrptcuDjskEhgEVivkQZE7GlPIvPljwwbj4ZbH9acLgoFGRdj5b8ceMvymDdMOPRj4HaOhziLqmGIJNqZDVSRu/IlC+AVRoDKH1rH+Wqwv+WU8P8UQF5jk7rpKYmEhISAgJCQmasyMiIk53Pi2D6b8dYvKyvZleK1c8kMc71CI4wJdAP1+qlQ6mVNEAygT74/9dX0z77M+QM/sWIT2otDUFAZSoDCafi/2Lf/7X/6/937ODLemQkQqhDcDXH8zpUKsjlKsHRcs4/y/hKgUEBGR7W/m1/P72qDk7IiIinsIwDN7+ZS8frNif6bVX7mpAj8aVKBrod/lOED0B1rxj34SJ2Fr3c/aGXuAfDAHBF4NMPjiZBCTlz3vlAx8fH6pXr05AQEC+vq/CjoiISD5bseckg2b+6bCtWulgnutSl8j65bPe6cJZeL2a47a63Yi9+XXOJiZSrlw5goODvfaBuBaLhePHj3PixAmqVq2ar8epsCMiIpJPlu+OY/CsjQ7bggN8+eWJtlS+Ljjrnf49DN8Pg6MbLtmpNDy8FnPRcpzdu5dy5cpRunRp5xVeQJQtW5bjx4+TkZGBv79/vr2vwo6IiEg+ePnnnUxfc8hh25RejeneuGLWZykMA7Z8BT+OcNze8UW4+QkA0lOst40HB2cTlLzMf5evzGazwo6IiEhBMuGnHcxce9jW/+D+JtzRqGL2O6QkwJSG1j//0+IhuO0l8A/KNNxbL11dzlnHqbAjIiKSRxfSzHR4eyXHE+wP7ts9sRNB/r6ZB5szYMHDsP17MCyOrz0wz/o8G3EKhR0REZE8SEk3U3fcEodtW8fdlnXQmf8w/PW/zNtbPAhd3nRShXlz+PBhqlevzpYtW2jcuLG7y8kXCjsiIiK5lJiSTpOXltn6PZtV5vV7GmW+DHN0I8zoZH3OzX+KloXuH8L17cAvf2+xzg9VqlThxIkTlCmTv8/fCQsLY9SoUYwaNSpf3/dqKOyIiIjkwpaYf7nrw99t/UGtwxjfrX7mgT+MgC1f2vuBITBya4FediEtLY2AgADKl8/m9ngPlU9PJRIREfF+j369ySHoDG5dPXPQST0HL4Y4Bp3ISTAmJl+DzieffELFihWxWBzn/3Tv3p3Bgwdz4MABunfvTmhoKMWKFaN58+b8+uuvDmPDwsKYOHEi/fv3p0SJEjz44IMcPnwYk8nE1q1bAeudUUOGDKF69eoUKVKE2rVr8+677zq8z8CBA+nRowdvvfUWFSpUoHTp0gwfPpz0dOsZrXbt2vHPP//wxBNPYDKZXD7hWmFHRETkCk4lpdLy1V9ZtC3Wtm3WoOaM61bPcaBhwPSOjtue3AsRj+Z7Tffddx9nzpxhxYoVtm3x8fEsWbKEvn37cu7cObp06UJ0dDRbtmyhU6dOdOvWzWHNSYC33nqL8PBwtmzZwtixYzN9H4vFQuXKlfnuu+/YuXMn48aN47nnnuPbb791GLdixQoOHDjAihUr+Pzzz5k1axazZs0CYN68eVSuXJmXXnqJEydOcOLEiXz/+8iJLmOJiIhcxjAMth1LYP3BM7y6aLfDa+GVQ5j3aGt8fS47O/HPOpjZyd7v/Ca0fNBpNV533XV07tyZb775hg4drHdyzZ07lzJlytC+fXt8fHwIDw+3jZ84cSLz58/nxx9/ZMQI+7N9br31Vp588klb//Dhww7fx9/fnwkTJtj61atXZ926dXz77bf07NnToZ4PPvgAX19f6tSpQ9euXYmOjmbYsGGUKlUKX19fihcv7pZLZAo7IiIiF8UlpvDxqoN8tf4f0syWTK/3j6jGS90b2DdYLLD1a1j6HKQm2rc37OnUoPOfvn37MmzYMD788EMCAwP5+uuv6d27Nz4+Ppw7d44XX3yRhQsXcuLECTIyMrhw4UKmMzvNmjW74veZOnUqM2bMICYmhgsXLpCWlpbpTq369evj62u/E61ChQps27YtX47zWinsiIhIoWW2GBz79wKvL93Nwr+zvrTSqkZpWtcsw6PtaljnmhgGbJsLK16Bfw9l3uGSJyA7W7du3TAMg4ULF9K8eXN+++033nnHuojoU089xbJly3jrrbeoWbMmRYoU4d577yUtLc3hPYoWLZrj95g9ezZPPfUUb7/9NhERERQvXpw333yTP/74w2Hc5U88NplMmeYTuYvCjoiIFAqHTyezOzaJjYfjCfL35ce/jhMTfz7LsdVKB/Ne7yaEVykJMX/AmT9g9Xz46xuIP5j1N+j0GrR8GFw4+TYoKIi7776br7/+mv3791O7dm1uvPFGANauXcvAgQO56667ADh37lymS1RXY+3atbRq1YpHH7XPOzpw4ECu3ycgIACz2Zzr/fKDwo6IiHi1pTtiefHHHZy45CnHWalVrhhv9wynUeWSkJEKi56Cz77I+c1vehRujoJiZfOv4Fzq27cvd9xxBzt27OCBBx6wba9Vqxbz5s2jW7dumEwmxo4dm6czLbVq1eKLL75g6dKlVK9enS+//JI///yT6tWr5+p9wsLCWL16Nb179yYwMDDfn+OTE4UdERHxSv8mp3Hn1DUcib/gsL1ZtetIzbDQumYZrgv2Z0CrMOtTj0/vg+gRcHA1pCZkfsM6d4B/EQgsbr1MVbKqi44kZ7feeiulSpViz5493H///bbtkydPZvDgwbRq1YoyZcrw7LPPkpiYmMM7Ze2hhx5iy5Yt9OrVC5PJRJ8+fXj00UdZvHhxrt7npZde4qGHHqJGjRqkpqZiGEaua8krk+HK71YAJCYmEhISQkJCAiVKlHB3OSIiks9mrT3Eywt3kWFx/PX29dCWtKpR2v6Ml9QkWDMF9i+DE39l/WZBIfDAfKjc1LlFZyMlJYVDhw5RvXp1goIyLxDqbXI63mv5/a0zOyIi4hVW7T3FgBkbMm1/7NaajOp4g+Ot4vMegr9nZ/1GbZ6EpoOgRCXw0ePovIHCjoiIeLwFW44xas5Wh22fDWhG+9rl8Lk05KSnwPQOELfdvq1EZWg+BGrdDqH1XTrBWFxDYUdERDxWhtlCv882sO7gGdu2mQOb075OucyDN38BPz7muO35WOs8HPFqCjsiIuKRth45S4+pax22rXyqHWFlLntuTFqydVHOHfPs224aDp1edUGVUhAo7IiIiEe5fNVxgO6NK/JOz8aOl6zA+vC/74c4buu3AGq0d26RUqAo7IiIiMf4cv0/jF2w3WHbxB4N6HdTtcyDFz0DGz6292vcCr2+hoBgJ1cpBY3CjoiIeIS5m446BJ1pDzSlU4MsFpU0Z8DE0vZ+YAl4+De4Lsz5RUqBpLAjIiIFmsVi0PbNFRz91/pwwJAi/qx6uh0lgwMyDzYMeO2yh/09cxB8/TOPlUJDDxAQEZECKyXdTMtJ0bag4+djYtkTbbMPOp/dBunJ1v4NneHFBAUd0ZkdEREpmFbuOcnAmX/a+rfVC+XT/s2y3+HvOXD04vgKjaHP/5xboHgMndkREZEC56e/jjsEnZ7NKvNJvxyWbNgxH+Y/ZG1XvBEeWqWHAxYQU6dOJSwsjKCgIFq2bMmGDZmfcu1sCjsiIlKgzFp7iMf+t8XWX/7kLbxxb7h9TavLbZ8H3w209/vNd26BctXmzJlDVFQU48ePZ/PmzYSHhxMZGcnJkyddWofCjoiIFBjLd8fx4k87bf2VT7Xj+rLFst9h/TSYO8ja9i8KwzdAkZLOLVKu2uTJkxk2bBiDBg2iXr16TJs2jeDgYGbMmOHSOjRnR0RECoToXXEM+Xyjrf/n8x0pWzww+x2Wvwyr37T3H/29UNxebhgGF9LNbvneRfx9sz/Ddpm0tDQ2bdrEmDFjbNt8fHzo2LEj69atc1aJWVLYERERt9t5PNEh6Mx+8Kbsg45hwP96w94l9m1PH4SipbMe72UupJupN26pW773zpciCQ64uuhw+vRpzGYzoaGhDttDQ0PZvXu3M8rLlsKOiIi41fqDZ+j9yXoAfEww56EImoeVynpw4nF4pz4YFvu25+PAP8gFlYqnUtgRERG3+WLdYcb9sMPWj36yHdUvX8jzP2nnYXJdx21jjha6oFPE35edL0W67XtfrTJlyuDr60tcXJzD9ri4OMqXz+LJ106ksCMiIm7xx8EzDkFncs/w7IOOxQIfRdj74fdDjw8L5e3lJpPpqi8luVNAQABNmzYlOjqaHj16AGCxWIiOjmbEiBEuraXg/22JiIhXGvaFfY7O7omdCMrurMGpvTC1ub1/Y3+4830nVyf5ISoqigEDBtCsWTNatGjBlClTSE5OZtCgQS6tQ2FHRERcyjAMIiYtJzElA4AP+96YddAxDFj0FPw53b6tcV8FHQ/Sq1cvTp06xbhx44iNjaVx48YsWbIk06RlZ1PYERERl3rrlz3EJqYAcMsNZenSsILjAMOAtVPg1xcdt3d5C1oMc0mNkn9GjBjh8stWl1PYERERl1l34AxTVxwA4K4mlXinV2PHAannYFIlx21l60D/H6C4aye1ivdQ2BEREZfYcCiePp9abzEvHuTHG/c2chxwdBNMv9XeN/nA41vhumquK1K8ksKOiIg43amkVHp+bH9q7qxBzfH3vWTFosuDji5ZST5S2BEREafaEvMvd334u60/Y2Azmla75KGB5045Bp1+C6BGe9cVKF5PYUdERJzCYjF4au5fzNt8zLbtsVtrcmudS+7E+f0D+OV5e7/3/xR0JN8p7IiISL5LSTcTMSmaf8+n27ZNe6ApnRpcnGRsGPDV3XBguX2nXl9BnS4urlQKA4UdERHJV4ZhcOtbK21Bp1LJInz/SCvKhwT9NwB+He8YdB5cCRWbuL5YKRQUdkREJN/si0vitndW2/rdwivyfp9LQozFAq+UB3OqtV+5OQxZViiXfRDXUdgREZFrZhgGUd/+xfwt9vk5DSuFOAad9BR4v6k96ASXgUFLFHTE6RR2RETkmvx15Czdp6512PZp/2bcVu+Sicjb5sL3Q+z9BvfCvZ+5qEIp7HyuPERERCQzs8VgxDebHYJOnfLF2T2xkz3omNPhrdqOQafpIAWdQmL16tV069aNihUrYjKZWLBggVvq0JkdERHJtQtpZtq8sZzT59Js257rUocH29ZwHPjLWDgXa+/f/x3ccLuLqhR3S05OJjw8nMGDB3P33Xe7rQ6FHRERyZVTSak0f+VXW79muWIsfPxmAv0uWbk8LRm+vAuO/GHtt3gIurzh4krF3Tp37kznzp3dXYbCjoiIXL3YhBRumhRt6z98Sw1Gd65jH2AYsHEGLIyybyteETpNcmGVXs4wIP28e763f7BHTihX2BERkauScCHdIei827sx3RtfskJ5RipMaQjn4uzbqrS03nHloymi+Sb9PLxa0T3f+7njEFDUPd/7GijsiIjIFSWlpBNxSdCZcGd9x6ATfxDeu+yhgPfNgvp3uaZAkRwo7IiISI6mrtjPm0v32PqPtqvBgFZh9gF7l8I3Pe39ijfC0GidzXEW/2DrGRZ3fW8PpLAjIiLZGjBjA6v2nrL1721amWc6XTJH549PYPHT9v4to6H9GBdWWAiZTB55KcmdFHZERCRLLyzY5hB01o/pYF/fCmDR07DhE3v/4TVQvqELK5SC7ty5c+zfv9/WP3ToEFu3bqVUqVJUrVrVZXUo7IiISCZR325l3mb70g8HXu2Cr88ld+Fsn2cPOr4B8MxBCCzu4iqloNu4cSPt27e39aOirHfpDRgwgFmzZrmsDoUdERFx8OW6w7ag42OCfa9cFnTOxsDcQfb+M4cgsJiLqxRP0K5dOwzDcHcZWi5CRETspv92kLE/7AAgwM+H3RM7OwadMwfgvRvt/TFHFXSkwFPYERERAJbtjOPlhbts/Y0vdCTA75JfE4YBH7UGS7q1P2ixLl2JR1DYERERElPSGfbFRlv/r3G3UyLI3z7AMOD9ppBxwdrv/T+o1srFVYrkjdvDztSpUwkLCyMoKIiWLVuyYcOGHMdPmTKF2rVrU6RIEapUqcITTzxBSkqKi6oVEfFOk3/Za2t/MbgFIcH+jgN+eQHiD1jbbZ6COl1cWJ3ItXFr2JkzZw5RUVGMHz+ezZs3Ex4eTmRkJCdPnsxy/DfffMPo0aMZP348u3bt4rPPPmPOnDk899xzLq5cRMR7fLX+H2b9fhiAUR1r0faGso4Dtn8P6z6wthvcAx3GurZAKRCTfF3BWcfp1rAzefJkhg0bxqBBg6hXrx7Tpk0jODiYGTNmZDn+999/p3Xr1tx///2EhYVx++2306dPnyueDRIRkazFnDnPxJ932vqP31or86AN0+3tu6dnfl2cxt/feobt/Hk3LfzpYmlpaQD4+vrm6/u67dbztLQ0Nm3axJgx9idt+vj40LFjR9atW5flPq1ateKrr75iw4YNtGjRgoMHD7Jo0SL69euX7fdJTU0lNTXV1k9MTMy/gxAR8WAWi8Ed7/9GaoYFkwk2Pt8RH5/LVrQ+uhFifre2H/ldS0C4mK+vLyVLlrRd8QgODsbkgauOXw2LxcKpU6cIDg7Gzy9/44nbws7p06cxm82EhoY6bA8NDWX37t1Z7nP//fdz+vRpbr75ZgzDICMjg4cffjjHy1iTJk1iwoQJ+Vq7iIg3GDVnK4kpGQDMGNCc0sUCHQcknoDpHaztajdDaH0XVygA5cuXB8h2ioc38fHxoWrVqvke6DzqoYIrV67k1Vdf5cMPP6Rly5bs37+fkSNHMnHiRMaOzfoa8pgxY2xPbATrmZ0qVaq4qmQRkQLp1UW7+PEv62KSdzWpRPs65TIPWvKsvX3zKNcUJpmYTCYqVKhAuXLlSE9Pd3c5ThUQEICPE84eui3slClTBl9fX+Li4hy2x8XF2VLs5caOHUu/fv0YOnQoAA0bNiQ5OZkHH3yQ559/Psu/oMDAQAIDAzNtFxEprF5ZuJNPfzsEQMWQICb3DM886PQ+2PmDtd35Dah1mwsrlKz4+vrm+1yWwsJtF18DAgJo2rQp0dHRtm0Wi4Xo6GgiIiKy3Of8+fOZAs1/H3xhmakuInItXl20yxZ0ivj78tNjN2e+ZBB/CD5oZu83H+bCCkXyn1svY0VFRTFgwACaNWtGixYtmDJlCsnJyQwaZF1zpX///lSqVIlJkyYB0K1bNyZPnkyTJk1sl7HGjh1Lt27dlHZFRK5gwZZjfLL6IACBfj7smBCZeUKyxQLTbrb3e32lScni8dwadnr16sWpU6cYN24csbGxNG7cmCVLltgmLcfExDicyXnhhRcwmUy88MILHDt2jLJly9KtWzdeeeUVdx2CiIhHSE7NYNScrYB1cc+dL3XKHHQAVr0Oaees7R7ToG431xUp4iQmo5Bd/0lMTCQkJISEhARKlCjh7nJERFxi+m8HbeteRT95CzXKZrF455/TYeGT1nZ4H7hrmgsrFMnZtfz+1rlJEREvd/h0si3oDG9fI+ugE/OHPegAdH7dRdWJOJ/CjoiIl7t9ympb+8E2NTIPSIqFGbfb+6NjICjEBZWJuIbCjoiIF/v57+OkZVgAGN25TuYFPgHmPGBv9/9BQUe8jsKOiIiX2nk8kRHfbAGgR+OKPHxLFmd1Vr0BR/+0tru9C9e3c12BIi6isCMi4qWm/3bQ1n6pR4PMA9IvwNp3re2SVaHpQNcUJuJiCjsiIl7ofFoG87YcA2BKr8aUCLrs8lVGGszsbL3N3DcAhq10fZEiLqKwIyLiZWITUqg3bikAxQL9uKNRhcyDpneA49ZLXHR9G4qWdmGFIq6lsCMi4mUe+mqTrf3q3Q3x873sn/rdiyD2b2u7zh1wY38XVifiego7IiJeZM2+0/x15CwAk3uGc2d4RccB5+Nhdh9ru2hZ63IQIl5OYUdExIvMXGtd5LPtDWW5+8bKji+aM2BGJ3t/4EK4fBFQES+ksCMi4iVmrT1E9O6TAAy9uXrmAcvGwek91nbvb6BsbRdWJ+I+CjsiIl4g4Xw6L/60E4BKJYvQ9oayjgNO/AXrp1rb5RtCna4urlDEfRR2RES8wHvL99naP45onXnA/Ift7aHRLqhIpOBQ2BER8XBTft3LZ2usc3WejqxN6WKBjgPOx8NJ61kfbn8Z/C57XcTLKeyIiHiwI/HnmfKr/axO35ZVMw/6fqj1z2LlIWKEiyoTKTj83F2AiIjk3SsLdwEQWiKQlU+1p0iAr+OA74fCgYuXrbpN0d1XUigp7IiIeKgHv9jILzvjAJh6/42Zg86iZ2Dbd9Z27S5Qu7OLKxQpGBR2REQ80H3TfufPw/8C0LTadTQLK+U4IGY9bPjY2vbxg/tmubZAkQJEYUdExIOYLQaRU1az/+Q527ZvH4pwHHR4LczqYm0HloDRMbp8JYWaJiiLiHiQ+z9dbws6tcoV4+CrXfD1uSTInN5vDzoA/RYo6EihpzM7IiIe4rXFu/njUDwApYsGsCzqlsyDlj5nbz/wPVRu6qLqRAoundkREfEAU1fsZ9qqAwC0qF6KTWNvyzwoZj3sW2ptt3ocanZ0YYUiBZfCjohIAXc+LYM3l1rXtGpa7brMc3QA1r4HMyKtbR9/uHWsCysUKdh0GUtEpIB7YPoftvbXQ1tmHrD0eVj3gb0/cCH4BbigMhHPoLAjIlJAGYbBvdPWsTnmLAAPtr2eIP8sHhr437N0AKJ2QYmKritSxAMo7IiIFEC7YxPp/O5vGIZ925jOdRwHHd3oGHSeOQTBlz1vR0QUdkRECpqTSSl0mvKbrd+kakm+HNIS06W3kKeegxmdrO3gMvDUXvC57KyPiAAKOyIiBUpqhpkWr0Tb+m/fF849TStnHvjTSLCkW9sPfK+gI5ID3Y0lIlJAWCwG4RN+sfVf6Fo366Cz5DnYPtfabvMkVGzsmgJFPJTO7IiIFBBPz/2blHQLAI+0q8HQNtdnHjR3iD3oVG0FHca5sEIRz6QzOyIiBcDibSf4fvNRADrVL8+znepkHrTlK3vQAbh/jouqE/FsOrMjIuJmR/89zyNfb7b137+/SeZBp/bCD8Pt/efjwD/IBdWJeD6d2RERcaPzaRnc/PoKW3/xyDb4+2bxT/PiZ+zt4X8q6IjkgsKOiIgb9fp4va391ZCW1K1QIvOgQ7/BwYuBqM9sKHuDi6oT8Q4KOyIibjL9t4NsO5YAwNORtbm5VpnMgwwDfnzM2q7WGmp3dmGFIt5BYUdExA1+2HqMlxfuAqBZtesY3r5m1gMXPgn/HrK2u7zpoupEvIvCjoiIi204FM/I2Vtt/U/7N8t64KHVsPEza7tedwit7/ziRLyQwo6IiAudTEqh58frbP11Y27luqJZrFB+4i/4vJu1XaIy3D3dRRWKeB+FHRERF3ovep+tvWRUGyqEFMk8aOcP8HFbe//eGeCXRSASkauisCMi4iLbjyXw1foYAJ7oeAN1ymdx55XFDN/2t/f7LYCqLV1ToIiX0kMFRURc4EKamTveX2PrD21TPeuByy5Z/uGRdRBaz8mViXg/ndkREXGydLOF1q8vt/U/7teUooFZ/L/mmQOw7gNru8E9Cjoi+URhR0TEye76cC3xyWmA9fJVZP3ymQcZBnx9r73fdbKLqhPxfgo7IiJO9OW6w2w/lghAi7BSPN4hm+fpbP0G4g9a24MWQ5GSrilQpBBQ2BERcZJzqRlMWrwbgDa1yjDnoZswmUyZB144Cz88am03vA+qtXJdkSKFgMKOiIiTfLB8P+fTzBQN8OXT/s2yDjqGAd8PsfcjX3VdgSKFhO7GEhFxgpNJKUxbdQCAxzvUIsjfN/Mgczp81ApO77X2I1+FYuVcWKVI4aAzOyIiTvDJqoO29oBWYVkPWvyMPehUaw0Rw51fmEghpLAjIpLPYs6cZ/oa6+KdYzrXyfqsTkoibP7C2m7yAAxa5MIKRQoXhR0RkXz23PxttvbA1mFZD9oxDywZEFwG7njXNYWJFFIKOyIi+WjAjA2s2X8agPf7NCHQL6u5Ohnw00hru3Zn8NX0SRFnUtgREcknu2MTWbX3FAB+Pia6hVfMeuC8ofZ2474uqEykcFPYERHJB+lmC52m/AZAmWKB7Hulc9YD5/SDHfOt7dajoFqEawoUKcQUdkRErtH5tAxqPb/Y1n+/T5Osn6kTuw12/WhthzaA2ya4qEKRwk1hR0TkGizdEUu9cUtt/UGtw4ioUTrrwStfs/5Zsho8vCbrMSKS7zQrTkQkjywWg+fm2e+8evWuhtzfsmrWg49sgN0/W9vdpkBWZ35ExCkUdkRE8uj1Jbs5c3E180WPt6FexRJZDzRnwGe3XeyY4Pr2rilQRABdxhIRyZP1B8/w8WrrU5I71CmXfdAB2PCxvT30V53VEXExhR0RkVzaG5dE70/W2/rv9WmS/eCMNFj6nLXduC9Ububk6kTkcgo7IiK59OqiXbb2klFtKBqYw4yAL++yt9s/78SqRCQ7CjsiIrmwLy6JlXusDw78pF9T6pTP4fLV8pfhn4t3Xd00HEIquaBCEbmcwo6ISC68cvGsToNKJbitXmj2A83p8Nvb1nbxitDpVRdUJyJZUdgREblKry/ZbTur80LXelk/OPA/K14Fw2Jtj/jTBdWJSHbcHnamTp1KWFgYQUFBtGzZkg0bNuQ4/uzZswwfPpwKFSoQGBjIDTfcwKJFi1xUrYgUVvHJaXy08gAAbWqV4abrs3lwIEDcDlgz2dpuPQoCizm/QBHJllufszNnzhyioqKYNm0aLVu2ZMqUKURGRrJnzx7KlSuXaXxaWhq33XYb5cqVY+7cuVSqVIl//vmHkiVLur54ESlUpvy6F4Agfx9mDWqR/UDDgO8vLvQZUAxuHeuC6kQkJ24NO5MnT2bYsGEMGjQIgGnTprFw4UJmzJjB6NGjM42fMWMG8fHx/P777/j7+wMQFhbmypJFpBDaE5vEF+v+AeCl7g3w9cnh8tXv78HJndb2vTPAV89uFXE3t13GSktLY9OmTXTs2NFejI8PHTt2ZN26dVnu8+OPPxIREcHw4cMJDQ2lQYMGvPrqq5jN5my/T2pqKomJiQ5fIiJXKy3Dwr0f/Q7A9WWKcl/TytkPTkmEZeOs7fKN4IZIF1QoIlfitrBz+vRpzGYzoaGOdzOEhoYSGxub5T4HDx5k7ty5mM1mFi1axNixY3n77bd5+eWXs/0+kyZNIiQkxPZVpUqVfD0OEfFus/+MISk1A4C3e4ZnPynZnAEzO9v7vb92QXUicjXcPkE5NywWC+XKleOTTz6hadOm9OrVi+eff55p06Zlu8+YMWNISEiwfR05csSFFYuIJ1u7/zTjftgBQLvaZWlS9brsBy8bC3Hbre17PoOS2SwIKiIu57aLyWXKlMHX15e4uDiH7XFxcZQvXz7LfSpUqIC/vz++vr62bXXr1iU2Npa0tDQCAgIy7RMYGEhgYGD+Fi8iXu/MuVSemLPV1n/1robZDz65G9Z/aG3X7gIN73VucSKSK247sxMQEEDTpk2Jjo62bbNYLERHRxMREZHlPq1bt2b//v1YLBbbtr1791KhQoUsg46ISF699PNOTialArD8yVuoWLJI1gPPx8P0Dvb+vTNcUJ2I5IZbL2NFRUXx6aef8vnnn7Nr1y4eeeQRkpOTbXdn9e/fnzFjxtjGP/LII8THxzNy5Ej27t3LwoULefXVVxk+fLi7DkFEvNCxsxf4YetxAB6+pQbXl83hOTkLHoW0c9b2gJ/BP5tQJCJuk6fLWGazmVmzZhEdHc3JkycdzrQALF++/Krep1evXpw6dYpx48YRGxtL48aNWbJkiW3SckxMDD4+9jxWpUoVli5dyhNPPEGjRo2oVKkSI0eO5Nlnn83LYYiIZGnsAuvcm4ohQTwdWTv7gaf3w97F1nbHCVC9jQuqE5HcMhmGYeR2pxEjRjBr1iy6du1KhQoVMt2d8M477+RbgfktMTGRkJAQEhISKFEihwX8RKRQ2huXxO3vrAZg5sDmtK+T+QGnNt/2h50/WNe+itoJOS0fISLX5Fp+f+fpzM7s2bP59ttv6dKlS152FxEpkBJT0m1BJ+L60jkHnVN7rUEHIPJlBR2RAixPc3YCAgKoWbNmftciIuJWT3/3l60ddfsN2Q80p8MXd1rbfkFQp5uTKxORa5GnsPPkk0/y7rvvkocrYCIiBU5Kupk+n6xn6Q7rozBev6chzcNKZb/DhzdB0glru/fX4Ke7QUUKsjxdxlqzZg0rVqxg8eLF1K9f37ZO1X/mzZuXL8WJiDibYRg0nbiM5DTrsjN1yhenV/McHgi4/BU4s9/arnU71OyY/VgRKRDyFHZKlizJXXfdld+1iIi43KTFu21Bp2JIED+OuDn7wedOweo3rO3SNeH+b11QoYhcqzyFnZkzZ+Z3HSIiLnf6XCqfrD4IQJOqJZn/aOucd5hrfQYY/sHw6B+alCziIa5puYhTp06xZ88eAGrXrk3ZsmXzpSgREVcY98N2W3vWwBY5Dz5zAA7/Zm3fHAW+blttR0RyKU8TlJOTkxk8eDAVKlSgbdu2tG3blooVKzJkyBDOnz+f3zWKiOS79QfPsGhbLADPd6lLSLB/zjuset36Z1AItH3KydWJSH7KU9iJiopi1apV/PTTT5w9e5azZ8/yww8/sGrVKp588sn8rlFEJF/tP3mO3p+sB+DGqiUZ1vb6nHfYOBP+nmNt3zpWl69EPEyenqBcpkwZ5s6dS7t27Ry2r1ixgp49e3Lq1Kn8qi/f6QnKIoXbudQMWr+2nIQL6YB1kc8c1746Hw9vVLf3x/0LPm5dVlCkULqW3995+ok9f/68bf2qS5UrV06XsUSkQHs/ep8t6HwztGXOQQfg0/b29tMHFXREPFCefmojIiIYP348KSkptm0XLlxgwoQJRERE5FtxIiL5KSXdzDcbYgAY360erWqWyXmHvb/Av4et7Y4ToGhp5xYoIk6Rp9sJ3n33XSIjI6lcuTLh4eEA/PXXXwQFBbF06dJ8LVBEJL+s3X+apJQMAv18eOCmalfe4dcXrX826gU3j3JmaSLiRHkKOw0aNGDfvn18/fXX7N69G4A+ffrQt29fihQpkq8Fiojklw9XHgCgY71Q/H2vcGJ78Wg4ucPabqMbL0Q8WZ4fFBEcHMywYcPysxYREaeZt/kom/75F4C+LXJYDgKsl6/++Mjarn8XlK3t5OpExJmuOuz8+OOPdO7cGX9/f3788cccx955553XXJiISH45cy6Vpy6uaN6xbrkrz9VZ/LS9fa+eGC/i6a467PTo0YPY2FjKlStHjx49sh1nMpkwm835UZuIyDU7fvYCrV5bDoC/r4nJvRrnvMOGT+2Tkh9dr2fqiHiBqw47Fosly7aISEH27Pd/29qv9GhIiaAcnpRsGLDiVWu7aisoV9fJ1YmIK+Tp1vMvvviC1NTUTNvT0tL44osvrrkoEZFrZbEYdHh7Jb/tOw3AS93r07N5lZx3mlwXLsRb290/cHKFIuIqeQo7gwYNIiEhIdP2pKQkBg0adM1FiYhcq8dmb+HAqWQAypcIon9EWM47/Dkdkk5Y2yFVoHQN5xYoIi6Tp7BjGAamLK5jHz16lJCQkGsuSkTkWhw4dY6Ff1uDS6miASx/6pacd7BYYOElt5c/sT37sSLicXJ163mTJk0wmUyYTCY6dOiAn599d7PZzKFDh+jUqVO+Fykikht9Li7yaTLBphc6Zvk/Zw5+etzeHqwHo4p4m1yFnf/uwtq6dSuRkZEUK2ZfUyYgIICwsDDuueeefC1QRCQ3pq7Yz8kk65zC57vUvXLQST4NW760tluPhKo3OblCEXG1XIWd8ePHYzabCQsL4/bbb6dChQrOqktEJNdeXbSLT1YfBKBKqSIMbXN9zjvEH4T3mtj7t45zYnUi4i65nrPj6+vLQw895LAIqIiIux2JP28LOgDLnrjCPJ3E445B5873wTfPD5UXkQIsTxOUGzRowMGDB688UETEBdIyLLR5Y4Wtv3nsbQT5++a803cD7e3mw6BJP+cUJyJul6ew8/LLL/PUU0/x888/c+LECRITEx2+RERcKerbrbb2hDvrU6poQM477PwRjvxhbUdOgq5v6UnJIl4sT+dsu3TpAljXwLp08t9/t6RruQgRcZVjZy/w88XbzJ+OrM2AVmE57xC3E769eBandC2IeNS5BYqI2+Up7KxYseLKg0REXODFH3cAUDzIj0fbXeFBgBYLzOlr79/9iRMrE5GCIk9h55ZbrjDxT0TEBfbGJbFsZxwAz0TWvvJt5ssnWu/AAuj1NVS60ckVikhBkOdbD86ePctnn33Grl27AKhfvz6DBw/WE5RFxGVeXmj996dWuWL0u9JyEOs/gjWTre1Wj0PdO5xbnIgUGHmaoLxx40Zq1KjBO++8Q3x8PPHx8UyePJkaNWqwefPm/K5RRCSTTf/Es3rvKQB6NrvCAp+Jx2Hpc/b+bS85sTIRKWhMhmEYud2pTZs21KxZk08//dS2ZERGRgZDhw7l4MGDrF69Ot8LzS+JiYmEhISQkJBAiRIl3F2OiOSB2WLQeMIvJKVmUMTfl50vRWZ/CSv9ArxS3tr2DYAn90BwKdcVKyL54lp+f+fpMtbGjRsdgg6An58fzzzzDM2aNcvLW4qIXJVzqRk0GG9fv+qzgc1ynqvzxzR7u9t7CjoihVCeLmOVKFGCmJiYTNuPHDlC8eLFr7koEZGs7Dye6BB0ujasQKsaZbLf4dReWP6ytd3gHmjcx8kVikhBlKew06tXL4YMGcKcOXM4cuQIR44cYfbs2QwdOpQ+ffSPiYjkvwyzhQEzN9j6D7a9nql9r3A31U8jwZJhbd/xjhOrE5GCLE+Xsd566y1MJhP9+/cnI8P6D4m/vz+PPPIIr732Wr4WKCICMOv3w5y6uJr5tw9F0KL6FS5H/f0dxPxubff6CoJ0p6hIYZWnCcr/OX/+PAcOHACgRo0aBAcH51thzqIJyiKeJ+FCOuETfgGgV7MqvH5vo5x3OLUXpja3tqvdDIMWOrlCEXE2l09Q/k9wcDAlS5a0tUVEnOHNpbtt7ee61r3yDtM72ts9P3dCRSLiSfI0ZycjI4OxY8cSEhJCWFgYYWFhhISE8MILL5Cenp7fNYpIIbbhUDxfrbfeEPH2feGEFPHPeYev7oHUBGt72AoomsMEZhEpFPJ0Zuexxx5j3rx5vPHGG0RERACwbt06XnzxRc6cOcNHH32Ur0WKSOG0OzaRnh+vA6Bs8UDuvrFSzjv8NQf2/2pth7XRchAiAuRxzk5ISAizZ8+mc+fODtsXLVpEnz59SEhIyLcC85vm7Ih4jrDR9rk2vzzRlhtCc3i0xYm/4eM21naRUvDMQbjSWlki4jFcPmcnMDCQsLCwTNurV69OQEBAXt5SRMTGMAyivv3L1v+w7405Bx3DgLmD7f1H1yvoiIhNnubsjBgxgokTJ5KammrblpqayiuvvMKIESPyrTgRKZzeXLqH+VuOAdCocghdGlbIeYff34Mz+6ztQUugeKiTKxQRT5KnMztbtmwhOjqaypUrEx4eDsBff/1FWloaHTp04O6777aNnTdvXv5UKiKFwpw/Y/hwpfWRFiFF/Jn/aOucdzi9D6InWttl60C1CCdXKCKeJk9hp2TJktxzzz0O26pUucKqwyIiV7Dpn3ie/X6brb9l7G34+FzhctTCKLCkWxf5fED/cyUimeUp7MycOTO/6xCRQm79wTP0/mS9rb/sibZXDjobPoVDq63tnl9AyBXu1hKRQumaHip46tQp9uzZA0Dt2rUpW7ZsvhQlIoXPiz/usLXnPHgTtXKakAxwbDMsfsbartQUanfOebyIFFp5mqCcnJzM4MGDqVChAm3btqVt27ZUrFiRIUOGcP78+fyuUUS83Hcbj7A7NgmAJaPa0PL60jnvYBgw6w4wLNb+gJ+dXKGIeLI8hZ2oqChWrVrFTz/9xNmzZzl79iw//PADq1at4sknn8zvGkXEiyWnZvD03L8BaFK1JHXKX8XzM764E9KTre2hyyFAy9WISPbydBnr+++/Z+7cubRr1862rUuXLhQpUoSePXvqCcoictX+tyHG1p7ev9mVd/jjY/s8ncrNoXJTJ1UmIt4iT2d2zp8/T2ho5udYlCtXTpexROSqbfonnpcX7gLg7hsrUbpYYM47WCz228wBBi5yYnUi4i3yFHYiIiIYP348KSkptm0XLlxgwoQJtrWyRERysv/kOe75yLrulckEIzvUuvJOW76ANOvcHp7cC356YruIXFmeLmNNmTKFTp06ZXqoYFBQEEuXLs3XAkXEOw2cucHWXvR4G6qVLprzDmnJ8NNIa/umR/WUZBG5ankKOw0bNmTfvn18/fXX7N69G4A+ffrQt29fihQpkq8Fioj3Wfj3CY7+ewGAN+5pRN0KV5iUbBjwakV7v81TTqxORLxNrsNOeno6derU4eeff2bYsGHOqElEvNi/yWkM/2YzADXKFqVn86t4+vr3Q+3tds9B0Svcmi4icolcz9nx9/d3mKsjIpIbz3z/t609Y2DzK++w+UvYPtfabvEgtHvWSZWJiLfK0wTl4cOH8/rrr5ORkZHf9YiIF/th6zGW7YwDYGCrsCvP0/lzOvw4wtqufzd0edPJFYqIN8rTnJ0///yT6OhofvnlFxo2bEjRoo7/YGmlcxG5nGEYPHvxrI6fj4kxXerkvEPaeVg82t6/62MnVici3izfVj0XEcnJku2xpKRbl3fY9MJtBPr55rzD3MHW1cwBntip28xFJM9yFXYsFgtvvvkme/fuJS0tjVtvvZUXX3xRd2CJSI6SUzN45GvrpOQuDcsTEuyf8w57f4G9i63tzm9qNXMRuSa5mrPzyiuv8Nxzz1GsWDEqVarEe++9x/Dhw51Vm4h4icnL9traT91eO+fBqedg7iBrO/x+aPmgEysTkcIgV2Hniy++4MMPP2Tp0qUsWLCAn376ia+//hqLxeKs+kTEwyVcSOezNYcA6FS/PNeXLZb9YIsFPrkF0s5Z+x3GuaBCEfF2uQo7MTExdOnSxdbv2LEjJpOJ48ePX1MRU6dOJSwsjKCgIFq2bMmGDRuuvBMwe/ZsTCYTPXr0uKbvLyLO89JPO23t0Z2vMCl5wydwZr+13fFFKFHBeYWJSKGRq7CTkZFBUFCQwzZ/f3/S09PzXMCcOXOIiopi/PjxbN68mfDwcCIjIzl58mSO+x0+fJinnnqKNm3a5Pl7i4hznUpK5fvNRwF4sVs9wsrkcKv52SOw4hVrO7QhtBrpggpFpDDI1QRlwzAYOHAggYH2lYlTUlJ4+OGHHW4/z82t55MnT2bYsGEMGmS9Rj9t2jQWLlzIjBkzGD16dJb7mM1m+vbty4QJE/jtt984e/Zsbg5DRFzk57+tZ33LFg+kX0RY9gMNA+b0hdREa3/Aj+CTp8eAiYhkkquwM2DAgEzbHnjggTx/87S0NDZt2sSYMWNs23x8fOjYsSPr1q3Ldr+XXnqJcuXKMWTIEH777bccv0dqaiqpqam2fmJiYp7rFZGrF3PmPBMuXsK6v0VVfH1M2Q9eNxVO/GVt3/MZBJdyQYUiUljkKuzMnDkzX7/56dOnMZvNhIY6rl4cGhpqW2D0cmvWrOGzzz5j69atV/U9Jk2axIQJE661VBHJhb1xSdz+zmpb/75mlbMfbBiw9l1ru1IzaHivk6sTkcLGo84TJyUl0a9fPz799FPKlClzVfuMGTOGhIQE29eRI0ecXKWI/G9DjL097CYqXxec/eD5D0HyxTl6vb92cmUiUhjl6QnK+aVMmTL4+voSFxfnsD0uLo7y5ctnGn/gwAEOHz5Mt27dbNv+u+3dz8+PPXv2UKNGDYd9AgMDHeYYiYhz7TyeyMy1hwEY2aEWETVyWKH8yAb4e461XbcbFM/8cy8icq3cemYnICCApk2bEh0dbdtmsViIjo4mIiIi0/g6deqwbds2tm7davu68847ad++PVu3bqVKlSquLF9ELpNutvDwV5ts/YGtwrIfbBgwb5i9f9/nzitMRAo1t57ZAYiKimLAgAE0a9aMFi1aMGXKFJKTk213Z/Xv359KlSoxadIkgoKCaNCggcP+JUuWBMi0XURc7+1f9hITfx5fHxM/DG/NdUVzWM9q00z497C1/cA88LnCWlkiInnk9rDTq1cvTp06xbhx44iNjaVx48YsWbLENmk5JiYGH92CKlLgHYk/z7RVBwDod1M1GlQKyX5w3E74+Qlru14PqNnB+QWKSKFlMgzDcHcRrpSYmEhISAgJCQmUKFHC3eWIeI1u769h27EEShUNYN2YW3Ne1fzzO+HQKgguDY9thiIlXVaniHima/n9rVMmInLN9p88x7ZjCQBMe6BpzkEnZr016ADc+YGCjog4ncKOiFyzZ+ZaHwjYpGpJWlTP4YGA8YdgRqS1XSwU6nTJfqyISD5R2BGRa/La4t1sjjkLwIvd6uc8+L3G9vbdnzitJhGRSynsiEieXTopuXvjioRXKZn94C1f2dt3vAPXt3NqbSIi/1HYEZE8GzBjAwCBfj5M6dU4+4EWM/ww3Nqu0QGaDXZ+cSIiFynsiEiezNt8lIOnkwGY2KMBJlMOC33+MtbevuMdJ1cmIuJIYUdE8mTG2kMA3FqnHD2b5fD08thtsH6qtV2vO1xXzQXViYjYKeyISK59uf4fth9LBOD5rnVzHrzwSXv77k+dWJWISNYUdkQkVwzDYMYa61mdG0KLUaNssewH7/oJjvxhbd83C/y0KK+IuJ7Cjojkyoo9Jzl0ca7OjIHNcx68Y771z+DSUP8uJ1cmIpI1hR0RuWrrD55h8KyNAHQLr0jl64KzH/zHx7D9e2tbz9QRETdS2BGRq5KUkk7vT9bb+sPb18h+cMwfsPgZazsoBK5v7+TqRESyp7AjIldlzLxttvbsB2+iTvkcFuJb8Yq9/fBa8MlhrSwRESdT2BGRKzqZmMLCbScAePK2G7jp+tLZD1431b7Q57AVUDKH29JFRFxAYUdEcrTtaAItXo3GMCCsdDCPdaiV/eDz8bD0OWu7TG2odKNrihQRyYHCjohka8n2WLp9sMbWH96+Zs47fH2v9U+TLwxe4sTKRESunp+7CxCRgik2IYWHv9pk63/U90Y6N6yQ/Q7zHoRjF8ffMRmCSzm5QhGRq6OwIyKZ/Jucxk2TogHw8zGx8YWOlAwOyH6HvUvh7zn2ftOBzi1QRCQXdBlLRBykZVho+vIyW3/WoBY5Bx2LGZaMtrb9i8LoGCdXKCKSOwo7IuLgszWHsBjW9sQeDbi5Vpmcd5jdF+IPWtvDoq3P1RERKUAUdkTEZnPMv7y+ZDcAdzSqQL+brrBC+aHfYO9ia/um4VDuCouCioi4gcKOiACQkm5m6OfWpSCKBfrxyl0Nc97BYoGfHrf3b3/ZidWJiOSdwo6IADDx553EJ6cB8P0jrQgp4p/zDn/PsV++euR38NE/JyJSMOlfJxEhPjmNr/+wTizu3rgitcsXz3mHcyftDw8M7wOh9Z1coYhI3unWc5FCzmIxuHGi/e6r1+5ulPMOhgFv3QAYEBgCka86t0ARkWukMzsihZhhGNzxvv0JyW/c04giAVdYtHP9R8DF27X08EAR8QAKOyKF2MSfd7HzRCIAdzWpRM/mV1i0M+EYLB1jbTe4Bxre6+QKRUSuncKOSCGVbrbw5frDtv6b917h8hXA90Pt7e4f5n9RIiJOoDk7IoWQxWJQ6/nFtv5vz7THz/cK/+/z+/sQ87u1HTkJ/IOcWKGISP7RmR2RQmj6moO29p3hFalSKjjnHRKPwy8vWNtl60LEo06sTkQkfynsiBQyC7Yc49VF1qck3xlekff6NLnyTpdevnpgrpMqExFxDoUdkULknzPJjJqz1daf2L3BlXc6thn+WWtt9/4GQio7pzgRESdR2BEpRP57cCDAqqfbERJ8hackpyTAp+2t7crNoU5XJ1YnIuIcCjsihcSFNDOfrLbO1XnythuoVrpozjtkpMFrVe39O95xYnUiIs6jsCNSSIz7YbutPaB1WM6DDQO+uc/e7zAeyl9hYVARkQJKt56LeLmUdDN9p//Bpn/+BWDcHfUoEXSFy1d/zYaDK63t1iOhTZRzixQRcSKFHREvduZcKre8uZJzqRkAlCseyOCbq+e8U+o5WPCwtV2pKdz2kpOrFBFxLl3GEvFij8/eYgs6PRpXZN2YDjnvEH8QJlWy9+/5zInViYi4hs7siHipMfO2sXb/GQBe6l6f/hFhOe9gMcPnd9r73d6FUlc4CyQi4gEUdkS80PvR+/jfButt5uFVStK3ZbWcd0g7D29Uh4wUa/+ez7TIp4h4DYUdES/z1tI9fLBiPwCVShZhwaOtMJlM2e/w7QDYucDej5ykoCMiXkVhR8SL/LIj1hZ0AJY/dUv2QSclET67HU7tsm9r/7zWvRIRr6OwI+IlDp9O5sEvN9n6m17oSKCfb9aDY7fDtNaO257cC8VDnVihiIh76G4sES+QcD6ddm+ttPV/eaItpYsFZj34yJ+OQadxXxh7WkFHRLyWzuyIeDiLxSD8pV9s/dfvacgNocWzHnx4LczqYu/f/jK0eszJFYqIuJfCjoiHe+nnnbb2neEV6dW8atYDDQO+G2jvP74FSl3v3OJERAoAXcYS8WCHTycz6/fDADSoVIL3+jTJfvAPIyD5pLXdZ7aCjogUGgo7Ih7sm4vP0gH47qFW2Q+Mfgm2fmVt3xwFtTs7uTIRkYJDYUfEQyWlpPP5xbM6ozvXoUhANndemdNh0yxr268I3DrWJfWJiBQUCjsiHuqdZftIzbDg52NiYKuw7AeufhPOW5eN4On94KMfexEpXPSvnogHOns+jRlrDwHwUvcGBPlnc1Yn8Tiset3abjcGAou5qEIRkYJDYUfEAz0/fzsAxQL96NOiStaD/j0Mk+ta24EloO0zrilORKSAUdgR8TCb/oln4bYTAAxqHZb9chALLln2ofsHunwlIoWW/vUT8SDpZgu9P1lv60fddkPWA//+Fv5Za213eg3qdXdBdSIiBZPCjogHefq7v0g3GwB8OaRF1md1jvwJ84ZZ2/7B0OJBF1YoIlLwKOyIeIhjZy/w89/Wy1dtapWhTa2ymQdtnAmfdbT3n9oLPtlMXhYRKSQUdkQ8QEq6mQ5vryTDYlCpZBG+GNwi8yCLGRZfMgl5yK8QmM0aWSIihYjWxhLxAFHfbiUl3QLAY7fWzHz5yjBgRicwp1n7IzZBmZourlJEpGDSmR2RAi4l3czSHXEA3FqnHL1bZLHQ54JH4egGazv8fgUdEZFLKOyIFGCGYdD9g7WYLQY+JvikX9PMgw6uhL++sbbD2sBdH7m0RhGRgk5hR6QAm7/lGHvikgBoXbMMfr6X/cimJcMXPazt0AYw8GfXFigi4gEUdkQKqO3HEoj69i8A6pQvnvWk5M/vBKy3otPtPdcVJyLiQRR2RAqo95fvs7XnPBiReVLygRVwbKO13f55qJzFJS4RESkYYWfq1KmEhYURFBREy5Yt2bBhQ7ZjP/30U9q0acN1113HddddR8eOHXMcL+KJlu2Ms01KntKrMSHB/o4DkmLhyx7WdpWWcIvWvRIRyY7bw86cOXOIiopi/PjxbN68mfDwcCIjIzl58mSW41euXEmfPn1YsWIF69ato0qVKtx+++0cO3bMxZWLOEdSSjrDvrCesSlbPJAeTSo5DjAM+OKS5R/u/tSF1YmIeB6TYRiGOwto2bIlzZs354MPPgDAYrFQpUoVHnvsMUaPHn3F/c1mM9dddx0ffPAB/fv3v+L4xMREQkJCSEhIoESJEtdcv0h+SsuwcMMLi239bx+KoEX1Uo6DPmkHx7dY23d/Co16uq5AERE3uZbf3249s5OWlsamTZvo2NH+eHsfHx86duzIunXrruo9zp8/T3p6OqVKlcry9dTUVBITEx2+RAqik0kpDkHn6cjamYPO5i/tQeeGTtDwPhdWKCLimdwadk6fPo3ZbCY0NNRhe2hoKLGxsVf1Hs8++ywVK1Z0CEyXmjRpEiEhIbavKlWqXHPdIs7w2uLdtnabWmV4tF0NxwF/zYEfR1jbwaWhz2zIaiFQERFx4PY5O9fitddeY/bs2cyfP5+goKAsx4wZM4aEhATb15EjR1xcpciV7Y5NZN5m67yzXs2q8OWQlo53X83pB/MvWb28/48KOiIiV8mta2OVKVMGX19f4uLiHLbHxcVRvnz5HPd96623eO211/j1119p1KhRtuMCAwMJDAzMl3pFnGHJ9hM8/NVmW//ZznUcB8zpB7t+tPef2g/FsljxXEREsuTWMzsBAQE0bdqU6Oho2zaLxUJ0dDQRERHZ7vfGG28wceJElixZQrNmzVxRqohTpGVYGPvDDlv/f8NuolTRAPuAb/vbg46PP4w9o6AjIpJLbl/1PCoqigEDBtCsWTNatGjBlClTSE5OZtCgQQD079+fSpUqMWnSJABef/11xo0bxzfffENYWJhtbk+xYsUoVqyY245DJLfOnEul6cu/AhDg68OqZ9pRIaSIfcDXPWHfUmvbxx/GntKlKxGRPHB72OnVqxenTp1i3LhxxMbG0rhxY5YsWWKbtBwTE4OPj/0E1EcffURaWhr33nuvw/uMHz+eF1980ZWli+TZiYQLRExabuu/3KOBPeicj4c3qtsHFysPo/5W0BERySO3P2fH1fScHXE3i8XghhcWk2Gx/ug9dmtNnry9tvXFC2fhy7vg+MU5PBUaw4MrFXREpNC7lt/fbj+zI1KYXEgz033qGlvQeblHAx64qZr1xWOb4dP29sFNHoDuU91QpYiId1HYEXGh+z7+nb1x5wAY1qa6NeikJMLbtSH9vH1go15axVxEJJ8o7Ii4QGxCCjdNst912Lt5FZ7vWg/OHoEpDRwHD1kGVVq4uEIREe+lsCPiZKfPpToEndqhxXm5RwOIPwjvNbEPbDoIuk4GH49+1qeISIGjsCPiROlmC80u3l4O8FL3+vSPCINNs+CnkfaB930O9Xu4ujwRkUJBYUfESbYfS+CO99fY+s92qkP/m6rBq5UhLck+sOcXUK+7GyoUESkcFHZE8llahoV+n/3BH4fibdsm9wzn7kbl4N1wx6Dz+FYoVT3zm4iISL5R2BHJR+8s28u70fsctk17oCmdQhPh5UuWebi+PfRf4NriREQKKYUdkWtkGAbzNh/jye/+ctjeoFIJvn+kFYFH1sLUbpe8cC/c+5mLqxQRKbwUdkSugcVi0GjCL5xLzXDYvuH5DpQr4gMft4JTu+0vDFwIYTe7uEoRkcJNYUckDwzDYNnOOB78cpPD9pkDm9O+TjlISYD3W0PCEfuL981S0BERcQOFHZFcunwRT4D2tcsyc9DFBwEe+g0+v8P+YqPecNc0rW8lIuImCjsiubDtaALdPljjsO3xDrV4omMta2fPEvhfL/uL7cZAu9EurFBERC6nsCNyFcwWg67v/cbuWPtt46M71+HhW2rYB8X84Rh0Bi2BahEurFJERLKisCNyBRlmC3e8v8Yh6Lx5byPua1bFPujcSZhxu72voCMiUmAo7IjkICXdTMfJqzj67wUA6lUowY8jWuPne8n6Vb9NhugJ9v4j6yC0nosrFRGR7CjsiORg7ILttqDToFIJfhpxM6b/JhqnnYfXqoDlktvOe32loCMiUsAo7Ihk438bYvhu01EAOjcoz0cPNLW/eGQDfHab4w5Ru6FEBRdWKCIiV0NhRyQLj/9vCz/+dRyA64L9ebd3E/uLu36COQ/Y+416w90fu7hCERG5Wgo7Ipd57H9b+Oli0PH3NbHm2VsJ8Ls4R+f4FsegM/gXqNrSDVWKiMjVUtgRucTdH65lc8xZAPx8TPw9PpIiAb7WF9NT4JN29sFP7YdiZTO9h4iIFCwKOyJAWoaFJi/9QnKa2bZt+4RIgvwvBh2LGb68y75D728UdEREPITCjhR6fx6O575p62z9m2uW4cshLex3XSWegMl17Dv0+AjqdHVxlSIiklcKO1KoHYk/7xB0Hmp7PWO61LUPsJgdg074/dD4fhdWKCIi10phRwq1p777y9Z+v08TuoVXdBww/yF7u+3TcOsLLqpMRETyi8KOFFrPzv2bPw7FA/DNsJa0qlHGccCf02Hbd9Z2+UYKOiIiHkphRwqd2IQUbpoUbetXKVUkc9BZ/gqsfsPa9guCYctdWKGIiOQnnysPEfEeZ86lOgQdgCUj2zoO2rHAHnR8AyBqF/j6u6ZAERHJdzqzI4XGudQMur2/xtbvd1M1JvZo4Dgo+Qx8N8DeHx0D/kVcVKGIiDiDwo4UCjFnztP2zRW2/tv3hXNP08qOg9IvwLTW9v7jWxV0RES8gC5jidfbF5fkEHSG3lw9c9AB+KYnJJ2wtu+dCaWqu6hCERFxJp3ZEa+283giXd77zdZ/tlMdHmlXw3FQRhrM7gOHVlv7rR6HBne7sEoREXEmhR3xWnGJKQ5B57kudXiw7WVB5+Ru63pXGRes/Rq3wu0TXVekiIg4ncKOeCWzxXC462rmwOa0r1POcdDZI/DhJSuWtxsDtzzrogpFRMRVNGdHvE6G2UKD8UsxDGv/6cjamYPOn5/BlEvuxOryFrQbDf+thyUiIl5DZ3bEq8QlptDyVfsZnSdvu4Hh7WvaB1jM1iUg/nsyMsC9M6DBPS6sUkREXElhR7xGwoV0h6Bzc80yPNahln3Aqb0w7WYwp9q3Pboeyl2y8KeIiHgdhR3xCmkZFnp9bF+9vG/LqrxyV0NrxzDg2/6w60f7DiWrwcO/QVCIiysVERFXU9gRj5eWYaHPp+vZHZsEwMf9mhJZv7z1xZj11ufnpCTYd+j7PdTq6IZKRUTEHRR2xKMdPp1M+7dX2iYjD7m5OpEhR+Hn12HjDMfBZevCwIVQtLTrCxUREbdR2BGPs3rvKeZvOcb8LccctvetGMvYjffDxix2GrgIwlpn8YKIiHg7hR3xGDPWHOLT3w5yIiEl02uv+31Cr/iVjhsb3gfNh0LVm1xToIiIFEgKO1JgpaSbOXDqHM/N28ZfRxMyvR7ps4F7fVfT0Wez/fE4NW+Dtk8p4IiIiI3CjrhdwoV0lu2MY+6mI5gwsfGfeNLNRpZji5DCBL/Puc93lT3gNB8Kde+EsDbgo+dkioiII4UdcZkMs4XZfx7h57+PczIplYOnkini78uFdHOO+4WZYrnedJwJfp9TxecUlLoewgZA/R4Q1hZ89Z+xiIhkT78lxKn+TU7jy/X/8PPfx9kbdy7T65cGnXoVilHVfIQeid9wneUMNU3HuI5z+JgunuXp8hY07gsBwa4qX0REvIDCjjjF/pNJTPl1Hz//fSLL10d2qEXDSiFUKRVMyWB/yljO4Dulnn3Af1ejat0Od30MwaWcX7SIiHglhR3Jdwu2HGPUnK0O28Irh9CnRVU6N6xASBF/+wsWC7zfBP49fMloE3SfCuG9wcfXFSWLiIgXU9iRfBOfnEavj9ex76T9clWPxhUZ3bku5UOCMu+wfhosedZxW5snocM4J1cqIiKFicKO5IuPVx1g0uLdDtt+e6Y9VUplM7/mq3tg/6/2fs3b4IG5TqxQREQKK4UduSZJKekM/XwjfxyKt20b0b4mUbfdgI+PKfMOB5bDytfgyB/2bfd/CzdEuqBaEREpjBR25Jq8umi3LeiULR7Il0NaUKd8icwD/1kHMzs5bitZFR7fqnk5IiLiVAo7kid7YpO4+8O1JKdZbx3v2qgCU++/MfPA1CT4dgAciHbcfvsrEDEc+5MBRUREnENhR3Jt7f7T9J1uvwx1XbA/7/ZqnHngikmw6jXHbW2fhvbPK+SIiIjLKOxIrkz/7SAvL9xl6z/SrgbPRNbGdGl4MWfA1OYQf9C+7YbO0PtrXbISERGXU9iRq2KxGDR75Vfik9Ns25aMauM4Pyd2Gyx/GfYucdz5iR0QUtlFlYqIiDhS2JErmvLrXqb8us9h2+qn21O19MXbyi+chbdqgTnNccdSNWD4H+Drj4iIiLso7Ei2MswWbn59BbGJKbZtvZpV4fV7G4FhwO6FsHg0JMQ47nhDZ+vcnMpNXVyxiIhIZgo74uDw6WSOJ1xg8i972fjPv7btgX4+LH2wPmFJm2DOG7Drp8w7dxhnfQKyiIhIAaKw483M6XB8CyTFWufR+AVaLzUlHIWzMVAslHNmX+Yn1mbymVb8aymS5dt0DdjMVJ+3YEY236f7VKjbDYJCnHcsIiIieaSw48FOn0tl4+F/iYlP5mRiKkH+vlgsZsynD2Ac34o5MRYLJgxMmPHnnBHIdqMOlUwVSacVSUYR/jZqZPne4aYDpOHLR/7vEuYT5/hiWBsIKAa3TYCytV1wpCIiInmnsOMh0jIsxCaksP14AlNX7GfH8cQcRvsAWTzg76J9RuY7oyoEw5AGPtx3gy8hQT5gagAmHzBNt/6JCa6rBiUq6Rk5IiLiURR2Cpgj8ec5dS6VAyfPkZiSwXcbj7A7NinHfeqbDhFMKqVNiVQ0ncYHA18fE6ZydfCp0R5fP39MJhM+Just5L4+PoSVCSbA14dAfx+ah5WieJDumBIREe9UIMLO1KlTefPNN4mNjSU8PJz333+fFi1aZDv+u+++Y+zYsRw+fJhatWrx+uuv06VLFxdWnH8Mw+D3A2eY+PPOK4YagBDOAdDBZwtD/BZR1xSDT/WbrWdfKjaBih2h+i0QXMrZpYuIiHgEt4edOXPmEBUVxbRp02jZsiVTpkwhMjKSPXv2UK5cuUzjf//9d/r06cOkSZO44447+Oabb+jRowebN2+mQYMGbjiCq5dutvD574dZtO0Em2POUjzQj6TUjCzHli0eSFB6Ao0z/iLQSKGv7680Mh3E12RYF9Cs0hIavwuVm0NgMRcfiYiIiOcwGYZhuLOAli1b0rx5cz744AMALBYLVapU4bHHHmP06NGZxvfq1Yvk5GR+/vln27abbrqJxo0bM23atCt+v8TEREJCQkhISKBEiSxW574Gq/eeYt/Jc6RlWNj0TzwlgwNISTfz6644igb4cSY5Lcf9u1dM5PkaByn35xtZD4gYYb292y8wX+sWEREp6K7l97dbz+ykpaWxadMmxowZY9vm4+NDx44dWbduXZb7rFu3jqioKIdtkZGRLFiwIMvxqamppKam2voJCQmA9S8tP/1zJpkHPlqT7evnL+sPah3GLTVCqPJdV0IsZwgyZcAJ4ARkquzemVCtNQQEw/lUIPXyESIiIl7tv9/beTlH49awc/r0acxmM6GhoQ7bQ0ND2b17d5b7xMbGZjk+NjY2y/GTJk1iwoQJmbZXqVIlj1Xnj5em5GLwa/c6qwwRERGPkpSUREhI7p7r5vY5O842ZswYhzNBFouF+Ph4Spcu7bhSdwGQmJhIlSpVOHLkSL5fYisIvP34wPuPUcfn+bz9GHV8ni+7YzQMg6SkJCpWrJjr93Rr2ClTpgy+vr7ExTk+tC4uLo7y5ctnuU/58uVzNT4wMJDAQMc5LiVLlsx70S5QokQJr/2PGLz/+MD7j1HH5/m8/Rh1fJ4vq2PM7Rmd//jkR0F5FRAQQNOmTYmOjrZts1gsREdHExERkeU+ERERDuMBli1blu14ERERKdzcfhkrKiqKAQMG0KxZM1q0aMGUKVNITk5m0KBBAPTv359KlSoxadIkAEaOHMktt9zC22+/TdeuXZk9ezYbN27kk08+cedhiIiISAHl9rDTq1cvTp06xbhx44iNjaVx48YsWbLENgk5JiYGHx/7CahWrVrxzTff8MILL/Dcc89Rq1YtFixYUOCfsXM1AgMDGT9+fKbLbt7C248PvP8YdXyez9uPUcfn+ZxxjG5/zo6IiIiIM7l1zo6IiIiIsynsiIiIiFdT2BERERGvprAjIiIiXk1hx8WmTp1KWFgYQUFBtGzZkg0bNuQ4/rvvvqNOnToEBQXRsGFDFi1a5KJK8yY3xzdr1ixMJpPDV1BQkAurzZ3Vq1fTrVs3KlasiMlkynY9tkutXLmSG2+8kcDAQGrWrMmsWbOcXmde5fb4Vq5cmenzM5lM2S7d4m6TJk2iefPmFC9enHLlytGjRw/27Nlzxf086WcwL8foST+HH330EY0aNbI9bC4iIoLFixfnuI8nfX65PT5P+uyy8tprr2EymRg1alSO4/LjM1TYcaE5c+YQFRXF+PHj2bx5M+Hh4URGRnLy5Mksx//+++/06dOHIUOGsGXLFnr06EGPHj3Yvn27iyu/Ork9PrA+IfPEiRO2r3/++ceFFedOcnIy4eHhTJ069arGHzp0iK5du9K+fXu2bt3KqFGjGDp0KEuXLnVypXmT2+P7z549exw+w3LlyjmpwmuzatUqhg8fzvr161m2bBnp6encfvvtJCcnZ7uPp/0M5uUYwXN+DitXrsxrr73Gpk2b2LhxI7feeivdu3dnx44dWY73tM8vt8cHnvPZXe7PP//k448/plGjRjmOy7fP0BCXadGihTF8+HBb32w2GxUrVjQmTZqU5fiePXsaXbt2ddjWsmVL46GHHnJqnXmV2+ObOXOmERIS4qLq8hdgzJ8/P8cxzzzzjFG/fn2Hbb169TIiIyOdWFn+uJrjW7FihQEY//77r0tqym8nT540AGPVqlXZjvG0n8HLXc0xevLPoWEYxnXXXWdMnz49y9c8/fMzjJyPz1M/u6SkJKNWrVrGsmXLjFtuucUYOXJktmPz6zPUmR0XSUtLY9OmTXTs2NG2zcfHh44dO7Ju3bos91m3bp3DeIDIyMhsx7tTXo4P4Ny5c1SrVo0qVapc8f9gPI0nfX7XonHjxlSoUIHbbruNtWvXurucq5aQkABAqVKlsh3j6Z/h1RwjeObPodlsZvbs2SQnJ2e7XJAnf35Xc3zgmZ/d8OHD6dq1a6bPJiv59Rkq7LjI6dOnMZvNtidD/yc0NDTbOQ6xsbG5Gu9OeTm+2rVrM2PGDH744Qe++uorLBYLrVq14ujRo64o2emy+/wSExO5cOGCm6rKPxUqVGDatGl8//33fP/991SpUoV27dqxefNmd5d2RRaLhVGjRtG6descn77uST+Dl7vaY/S0n8Nt27ZRrFgxAgMDefjhh5k/fz716tXLcqwnfn65OT5P++wAZs+ezebNm21LQF1Jfn2Gbl8uQgqviIgIh/9jadWqFXXr1uXjjz9m4sSJbqxMrkbt2rWpXbu2rd+qVSsOHDjAO++8w5dffunGyq5s+PDhbN++nTVr1ri7FKe52mP0tJ/D2rVrs3XrVhISEpg7dy4DBgxg1apV2QYCT5Ob4/O0z+7IkSOMHDmSZcuWuXwitcKOi5QpUwZfX1/i4uIctsfFxVG+fPks9ylfvnyuxrtTXo7vcv7+/jRp0oT9+/c7o0SXy+7zK1GiBEWKFHFTVc7VokWLAh8gRowYwc8//8zq1aupXLlyjmM96WfwUrk5xssV9J/DgIAAatasCUDTpk35888/effdd/n4448zjfXEzy83x3e5gv7Zbdq0iZMnT3LjjTfatpnNZlavXs0HH3xAamoqvr6+Dvvk12eoy1guEhAQQNOmTYmOjrZts1gsREdHZ3s9NiIiwmE8wLJly3K8fusueTm+y5nNZrZt20aFChWcVaZLedLnl1+2bt1aYD8/wzAYMWIE8+fPZ/ny5VSvXv2K+3jaZ5iXY7ycp/0cWiwWUlNTs3zN0z6/rOR0fJcr6J9dhw4d2LZtG1u3brV9NWvWjL59+7J169ZMQQfy8TPM/TxqyavZs2cbgYGBxqxZs4ydO3caDz74oFGyZEkjNjbWMAzD6NevnzF69Gjb+LVr1xp+fn7GW2+9ZezatcsYP3684e/vb2zbts1dh5Cj3B7fhAkTjKVLlxoHDhwwNm3aZPTu3dsICgoyduzY4a5DyFFSUpKxZcsWY8uWLQZgTJ482diyZYvxzz//GIZhGKNHjzb69etnG3/w4EEjODjYePrpp41du3YZU6dONXx9fY0lS5a46xBylNvje+edd4wFCxYY+/btM7Zt22aMHDnS8PHxMX799Vd3HUKOHnnkESMkJMRYuXKlceLECdvX+fPnbWM8/WcwL8foST+Ho0ePNlatWmUcOnTI+Pvvv43Ro0cbJpPJ+OWXXwzD8PzPL7fH50mfXXYuvxvLWZ+hwo6Lvf/++0bVqlWNgIAAo0WLFsb69ettr91yyy3GgAEDHMZ/++23xg033GAEBAQY9evXNxYuXOjiinMnN8c3atQo29jQ0FCjS5cuxubNm91Q9dX571bry7/+O6YBAwYYt9xyS6Z9GjdubAQEBBjXX3+9MXPmTJfXfbVye3yvv/66UaNGDSMoKMgoVaqU0a5dO2P58uXuKf4qZHVsgMNn4uk/g3k5Rk/6ORw8eLBRrVo1IyAgwChbtqzRoUMHWxAwDM///HJ7fJ702WXn8rDjrM/QZBiGkbtzQSIiIiKeQ3N2RERExKsp7IiIiIhXU9gRERERr6awIyIiIl5NYUdERES8msKOiIiIeDWFHREREfFqCjsiIiLi1RR2RMSjGYZBx44diYyMzPTahx9+SMmSJTl69KgbKhORgkJhR0Q8mslkYubMmfzxxx8OK0MfOnSIZ555hvfffz/XK3+LiHfRchEi4hU+//xzRowYwd9//01YWBgdOnSgZMmSzJs3z92liYibKeyIiNfo0aMHCQkJ3H333UycOJEdO3ZQtmxZd5clIm6msCMiXuPkyZPUr1+f+Ph4vv/+e3r06OHukkSkANCcHRHxGuXKleOhhx6ibt26CjoiYqOwIyJexc/PDz8/P3eXISIFiMKOiIiIeDWFHREREfFqCjsiIiLi1XQ3loiIiHg1ndkRERERr6awIyIiIl5NYUdERES8msKOiIiIeDWFHREREfFqCjsiIiLi1RR2RERExKsp7IiIiIhXU9gRERERr6awIyIiIl5NYUdERES8msKOiIiIeLX/A0a+YMgtm9FxAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "TRUE_EFFECT = 0.1\n", - "cd = generate_synth_data_with_categories(n_samples=8000, n_x=3, true_effect=TRUE_EFFECT)\n", - "cd.preprocess_dataset()\n", - "sns.ecdfplot(data=cd.data, x=cd.outcomes[0], hue=cd.treatment)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. ATE estimation: Running CausalTune\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# CausalTune configuration\n", - "num_samples = 5\n", - "components_time_budget = 10\n", - "train_size = 0.7\n", - "\n", - "target = cd.outcomes[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now if outcome_model=\"auto\" in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by outcome_model=\"nested\" (the default for now)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "ct_ab = CausalTune(\n", - " num_samples=num_samples,\n", - " components_time_budget=components_time_budget,\n", - " metric=\"energy_distance\",\n", - " verbose=3,\n", - " components_verbose=3,\n", - " train_size=train_size,\n", - " outcome_model=\"auto\"\n", - ") \n", - "ct_ab.fit(data=cd, outcome=target)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The point estimates compare as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Difference in means estimate (naive ATE): 0.121874\n", - "CausalTune ATE estimate:: 0.086094\n", - "True ATE: 0.1\n" - ] - } - ], - "source": [ - "print(f'Difference in means estimate (naive ATE): {ct_ab.scorer.naive_ate(ct_ab.test_df[cd.treatment], ct_ab.test_df[target])[0]:5f}')\n", - "print(f'CausalTune ATE estimate:: {ct_ab.effect(ct_ab.test_df).mean():5f}')\n", - "print(f'True ATE: {TRUE_EFFECT}')" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Explainable variation\n", - "\n", - "As a first performance check of this approach we test how much of the variation in the outcome metric remains unexplained with our outcome model prediction approach. \n", - "\n", - "For this, we use AutoML to predict outcomes as is done under the hood of CausalTune.\n", - "The lower the unexplained variation, the more promising it is to use CausalTune for AB Testing." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "automl = AutoML()\n", - "automl.fit(ct_ab.train_df[ct_ab.train_df.columns.drop([target])], ct_ab.train_df[target], task='regression', time_budget=30)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variation unexplained: 0.15%\n" - ] - } - ], - "source": [ - "# Fraction of variation unexplained\n", - "mse = mean_squared_error(automl.predict(ct_ab.test_df[ct_ab.test_df.columns.drop([target])]), ct_ab.test_df[target])\n", - "var_y = ct_ab.test_df[target].var()\n", - "fvu = mse / var_y\n", - "print(f'Variation unexplained: {100*fvu:.2f}%')" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bootstrapping with simple component models for inference\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# bootstrap configuration\n", - "\n", - "n_samples = 30\n", - "n_sample_size = cd.data.shape[0]\n", - "\n", - "components_time_budget = 5\n", - "train_size = .7\n", - "num_samples= 10\n", - "\n", - "ct_ate = []\n", - "scores = []\n", - "naive_ate = []" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "for _ in range(n_samples):\n", - " cd_bt = generate_synth_data_with_categories(n_samples=5000, n_x=3, true_effect=TRUE_EFFECT)\n", - " cd_bt.preprocess_dataset()\n", - " outcome_regressor = RandomForestRegressor()\n", - " \n", - " ct = CausalTune(\n", - " num_samples=num_samples,\n", - " components_time_budget=components_time_budget,\n", - " metric=\"energy_distance\",\n", - " train_size=train_size,\n", - " propensity_model='dummy',\n", - " outcome_model=outcome_regressor\n", - " ) \n", - "\n", - " ct.fit(data=cd, outcome=target)\n", - "\n", - " ct_ate.append(ct.effect(ct.test_df).mean())\n", - " scores.append(ct.best_score)\n", - " naive_ate.append(ct.scorer.naive_ate(cd_bt.data[cd_bt.treatment], cd_bt.data[target])[0])\n", - " del ct, cd_bt, outcome_regressor\n", - " gc.collect()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots()\n", - "\n", - "ax.boxplot([ct_ate, naive_ate])\n", - "ax.set_xticklabels(['$\\hat{\\mu}_{CausalTune}$', '$\\hat{\\mu}_{DiffInMeans}$'])\n", - "plt.axhline(y = TRUE_EFFECT, color = 'b', linestyle = '--')\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Segmentation with Wise Pizza" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The underlying estimators of CausalTune provide heterogeneous treatment effect estimates. Apart from simply predicting treatment effects for customers with certain characteristics, one can also perform an automatic segmentation of customers by treatment impact via [wise-pizza](https://github.com/transferwise/wise-pizza/tree/main) as we demonstrate here.\n", - "\n", - "In the synthetic dataset at hand, there are heterogeneous treatment effects by category, e.g. $.5*$TRUE_EFFECT if $X_1=1$ or $-.5*$TRUE_EFFECT if $X_1=2$\n", - "\n", - "The plot below displays an automated selection of relevant segments by CATE." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "segments = list(set(cd.data.columns) - set([cd.treatment]) - set(cd.outcomes) - set(['random']) - set(['X_continuous']))\n", - "\n", - "df_effects = ct_ab.test_df[segments + [cd.treatment]]\n", - "df_effects['CATE'] = ct_ab.effect(ct_ab.test_df)\n", - "df_eff_by_seg = df_effects.groupby(by=segments, as_index=False).agg({'CATE':'sum', 'variant': len}).rename(columns={'variant': 'size'})" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:min_segments parameter is deprecated, please use max_segments instead.\n", - "WARNING:root:min_segments parameter is deprecated, please use max_segments instead.\n" - ] - }, - { - "data": { - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "max_depth = 3\n", - "min_segments = 3\n", - "\n", - "sf = wp.explain_levels(\n", - " df=df_eff_by_seg,\n", - " dims=segments,\n", - " total_name='CATE',\n", - " size_name='size',\n", - " max_depth=max_depth,\n", - " min_segments=min_segments,\n", - ")\n", - "sf.plot(plot_is_static=False)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 30242e79d3174b6f96eb71e674b4709efedf9a7f Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:07:59 +0100 Subject: [PATCH 12/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/AB testing.ipynb | 449 +++++++++++++++++++++++++++++++++++++ 1 file changed, 449 insertions(+) create mode 100644 notebooks/AB testing.ipynb diff --git a/notebooks/AB testing.ipynb b/notebooks/AB testing.ipynb new file mode 100644 index 00000000..c9f422d9 --- /dev/null +++ b/notebooks/AB testing.ipynb @@ -0,0 +1,449 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AB Testing with CausalTune" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import pandas as pd\n", + "import numpy as np\n", + "import warnings\n", + "\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "import gc\n", + "\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "from causaltune import CausalTune\n", + "from causaltune.data_utils import CausalityDataset\n", + "from causaltune.datasets import generate_synth_data_with_categories\n", + "\n", + "from flaml import AutoML\n", + "import matplotlib.pyplot as plt\n", + "%pip install seaborn as sns\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "%pip install plotly\n", + "import plotly.io as pio\n", + "pio.renderers.default = \"png\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Note*: This notebook uses the the package *wise-pizza* which is not listed as a requirement to run CausalTune. It is merely used to showcase what is possible as an AB testing workflow.\n", + "\n", + "Install via\n", + "`pip install wise-pizza`" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install wise_pizza" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import wise_pizza as wp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CausalTune for AB Testing \n", + "\n", + "CausalTune can be used for AB Testing in two ways:\n", + "1. Variance Reduction\n", + "2. Segmentation analysis\n", + "\n", + "#### 1. Variance Reduction\n", + "A standard variance reduction technique is to control for natural variation in the experiment's outcome metric. The simplest way to do so is by running a simple regression with a selection of controls. A potentially more powerful and automated approach is to run CausalTune. \n", + "\n", + "#### 2. Segmentation Analysis\n", + "\n", + "We use the heterogeneous treatment effect estimates from CausalTune to feed them into the segmentation analytics tool Wise-Pizza." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Data Generating Process" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We first create synthetic data from a DGP with perfect randomisation of the treatment as we are replicating an AB test environment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is substantial variation within the outcome metric per variant which can be seen from the cdf per variant:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "TRUE_EFFECT = 0.1\n", + "cd = generate_synth_data_with_categories(n_samples=8000, n_x=3, true_effect=TRUE_EFFECT)\n", + "cd.preprocess_dataset()\n", + "sns.ecdfplot(data=cd.data, x=cd.outcomes[0], hue=cd.treatment)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. ATE estimation: Running CausalTune\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# CausalTune configuration\n", + "num_samples = 5\n", + "components_time_budget = 10\n", + "train_size = 0.7\n", + "\n", + "target = cd.outcomes[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator).\n", + "\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "ct_ab = CausalTune(\n", + " num_samples=num_samples,\n", + " components_time_budget=components_time_budget,\n", + " metric=\"energy_distance\",\n", + " verbose=3,\n", + " components_verbose=3,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\"\n", + ") \n", + "ct_ab.fit(data=cd, outcome=target)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The point estimates compare as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Difference in means estimate (naive ATE): 0.121874\n", + "CausalTune ATE estimate:: 0.086094\n", + "True ATE: 0.1\n" + ] + } + ], + "source": [ + "print(f'Difference in means estimate (naive ATE): {ct_ab.scorer.naive_ate(ct_ab.test_df[cd.treatment], ct_ab.test_df[target])[0]:5f}')\n", + "print(f'CausalTune ATE estimate:: {ct_ab.effect(ct_ab.test_df).mean():5f}')\n", + "print(f'True ATE: {TRUE_EFFECT}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Explainable variation\n", + "\n", + "As a first performance check of this approach we test how much of the variation in the outcome metric remains unexplained with our outcome model prediction approach. \n", + "\n", + "For this, we use AutoML to predict outcomes as is done under the hood of CausalTune.\n", + "The lower the unexplained variation, the more promising it is to use CausalTune for AB Testing." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "automl = AutoML()\n", + "automl.fit(ct_ab.train_df[ct_ab.train_df.columns.drop([target])], ct_ab.train_df[target], task='regression', time_budget=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variation unexplained: 0.15%\n" + ] + } + ], + "source": [ + "# Fraction of variation unexplained\n", + "mse = mean_squared_error(automl.predict(ct_ab.test_df[ct_ab.test_df.columns.drop([target])]), ct_ab.test_df[target])\n", + "var_y = ct_ab.test_df[target].var()\n", + "fvu = mse / var_y\n", + "print(f'Variation unexplained: {100*fvu:.2f}%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bootstrapping with simple component models for inference\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# bootstrap configuration\n", + "\n", + "n_samples = 30\n", + "n_sample_size = cd.data.shape[0]\n", + "\n", + "components_time_budget = 5\n", + "train_size = .7\n", + "num_samples= 10\n", + "\n", + "ct_ate = []\n", + "scores = []\n", + "naive_ate = []" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "for _ in range(n_samples):\n", + " cd_bt = generate_synth_data_with_categories(n_samples=5000, n_x=3, true_effect=TRUE_EFFECT)\n", + " cd_bt.preprocess_dataset()\n", + " outcome_regressor = RandomForestRegressor()\n", + " \n", + " ct = CausalTune(\n", + " num_samples=num_samples,\n", + " components_time_budget=components_time_budget,\n", + " metric=\"energy_distance\",\n", + " train_size=train_size,\n", + " propensity_model='dummy',\n", + " outcome_model=outcome_regressor\n", + " ) \n", + "\n", + " ct.fit(data=cd, outcome=target)\n", + "\n", + " ct_ate.append(ct.effect(ct.test_df).mean())\n", + " scores.append(ct.best_score)\n", + " naive_ate.append(ct.scorer.naive_ate(cd_bt.data[cd_bt.treatment], cd_bt.data[target])[0])\n", + " del ct, cd_bt, outcome_regressor\n", + " gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "ax.boxplot([ct_ate, naive_ate])\n", + "ax.set_xticklabels(['$\\hat{\\mu}_{CausalTune}$', '$\\hat{\\mu}_{DiffInMeans}$'])\n", + "plt.axhline(y = TRUE_EFFECT, color = 'b', linestyle = '--')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Segmentation with Wise Pizza" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The underlying estimators of CausalTune provide heterogeneous treatment effect estimates. Apart from simply predicting treatment effects for customers with certain characteristics, one can also perform an automatic segmentation of customers by treatment impact via [wise-pizza](https://github.com/transferwise/wise-pizza/tree/main) as we demonstrate here.\n", + "\n", + "In the synthetic dataset at hand, there are heterogeneous treatment effects by category, e.g. $.5*$TRUE_EFFECT if $X_1=1$ or $-.5*$TRUE_EFFECT if $X_1=2$\n", + "\n", + "The plot below displays an automated selection of relevant segments by CATE." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "segments = list(set(cd.data.columns) - set([cd.treatment]) - set(cd.outcomes) - set(['random']) - set(['X_continuous']))\n", + "\n", + "df_effects = ct_ab.test_df[segments + [cd.treatment]]\n", + "df_effects['CATE'] = ct_ab.effect(ct_ab.test_df)\n", + "df_eff_by_seg = df_effects.groupby(by=segments, as_index=False).agg({'CATE':'sum', 'variant': len}).rename(columns={'variant': 'size'})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:min_segments parameter is deprecated, please use max_segments instead.\n", + "WARNING:root:min_segments parameter is deprecated, please use max_segments instead.\n" + ] + }, + { + "data": { + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "max_depth = 3\n", + "min_segments = 3\n", + "\n", + "sf = wp.explain_levels(\n", + " df=df_eff_by_seg,\n", + " dims=segments,\n", + " total_name='CATE',\n", + " size_name='size',\n", + " max_depth=max_depth,\n", + " min_segments=min_segments,\n", + ")\n", + "sf.plot(plot_is_static=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 2a6385bd1eb1d6cd720110cca63cfcd707bafeea Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 16:04:04 +0100 Subject: [PATCH 13/22] Delete notebooks/CausalityDataset setup.ipynb Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/CausalityDataset setup.ipynb | 657 ------------------------- 1 file changed, 657 deletions(-) delete mode 100644 notebooks/CausalityDataset setup.ipynb diff --git a/notebooks/CausalityDataset setup.ipynb b/notebooks/CausalityDataset setup.ipynb deleted file mode 100644 index 4ca160f3..00000000 --- a/notebooks/CausalityDataset setup.ipynb +++ /dev/null @@ -1,657 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f3a2f126", - "metadata": { - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "# Setting up the data and causal model: CausalityDataset\n", - "\n", - "This notebook demonstrates how to use and configure `CausalityDataset` using an arbitrary `pd.DataFrame`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "d43137b0", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import os, sys\n", - "import warnings\n", - "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", - "root_path = root_path = os.path.realpath('../..')\n", - "try:\n", - " import causaltune\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", - "\n", - "try:\n", - " import dowhy\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", - "\n", - "try:\n", - " import flaml\n", - "except ModuleNotFoundError:\n", - " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", - " \n", - " \n", - " \n", - "from causaltune import CausalTune\n", - "from causaltune.datasets import synth_ihdp, iv_dgp_econml, generate_non_random_dataset\n", - "from causaltune.data_utils import CausalityDataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e072c202", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# this makes the notebook expand to full width of the browser window\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ] - }, - { - "cell_type": "markdown", - "id": "c2a0429f", - "metadata": {}, - "source": [ - "### Random assignment \n", - "We first illustrate the model setup with a subset of data from the Infant Health and Development Program (IHDP)." - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "0efc918c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
treatmenty_factualx1x2x3
015.599916-0.528603-0.3434551.128554
106.875856-1.736945-1.8020020.383828
202.996273-0.807451-0.202946-0.360898
301.3662060.3900830.596582-1.850350
401.963538-1.045229-0.6027100.011465
\n", - "
" - ], - "text/plain": [ - " treatment y_factual x1 x2 x3\n", - "0 1 5.599916 -0.528603 -0.343455 1.128554\n", - "1 0 6.875856 -1.736945 -1.802002 0.383828\n", - "2 0 2.996273 -0.807451 -0.202946 -0.360898\n", - "3 0 1.366206 0.390083 0.596582 -1.850350\n", - "4 0 1.963538 -1.045229 -0.602710 0.011465" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df = synth_ihdp(return_df=True).iloc[:,:5]\n", - "display(df.head())" - ] - }, - { - "cell_type": "markdown", - "id": "c5bce66b", - "metadata": {}, - "source": [ - "Generally, at least three arguments have to be supplied to `CausalityDataset`:\n", - "- `data`: input dataframe\n", - "- `treatment`: name of treatment column\n", - "- `outcomes`: list of names of outcome columns; provide as list even if there's just one outcome of interest\n", - "\n", - "In addition, if the propensities to treat are known, then provide the corresponding column name(s) via `propensity_modifiers`." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "bb50909e", - "metadata": {}, - "outputs": [], - "source": [ - "cd = CausalityDataset(data=df, treatment='treatment', outcomes=['y_factual'])" - ] - }, - { - "cell_type": "markdown", - "id": "73b6395a", - "metadata": {}, - "source": [ - "The next step is to use `cd.preprocess_dataset()` to deal with missing values, remove outliers etc." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "8803d695", - "metadata": {}, - "outputs": [], - "source": [ - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "dafa93e0", - "metadata": {}, - "source": [ - "The causal model is built by assuming that all remaining features are `effect_modifiers`" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "6695f65f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['x1', 'x2', 'x3']\n" - ] - } - ], - "source": [ - "print(cd.effect_modifiers)" - ] - }, - { - "cell_type": "markdown", - "id": "50447729", - "metadata": {}, - "source": [ - "Subsequently, use the preprocessed `CausalityDataset` object for training as follow: `CausalTune.fit(cd, outcome='y_factual')`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb9ebea5", - "metadata": {}, - "outputs": [], - "source": [ - "ct = CausalTune(components_time_budget=5,) \n", - "ct.fit(data=cd, outcome='y_factual')" - ] - }, - { - "cell_type": "markdown", - "id": "e8cf75fb", - "metadata": {}, - "source": [ - "The causal graph that CausalTune uses is " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "6b9a1ad6", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%matplotlib inline\n", - "ct.causal_model.view_model()" - ] - }, - { - "cell_type": "markdown", - "id": "f0ec03d0", - "metadata": {}, - "source": [ - "*Note that the variable `random` can be ignored and has no real meaning for the causal model.*" - ] - }, - { - "cell_type": "markdown", - "id": "80318c33", - "metadata": {}, - "source": [ - "#### Adding common causes\n", - "\n", - "If we had reason to assume that for instance `x1` and `x2` are `common causes` instead of `effect modifiers`, this can be made explicit:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "6babd054", - "metadata": {}, - "outputs": [], - "source": [ - "cd = CausalityDataset(data=df, treatment='treatment', outcomes=['y_factual'], common_causes=['x1', 'x2'])" - ] - }, - { - "cell_type": "markdown", - "id": "256f2054", - "metadata": {}, - "source": [ - "The causal graph becomes" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "510157f0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", - "Initial configs: [{'estimator': {'estimator_name': 'backdoor.causaltune.models.NaiveDummy'}}, {'estimator': {'estimator_name': 'backdoor.causaltune.models.Dummy'}}, {'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.metalearners.DomainAdaptationLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'min_propensity': 1e-06, 'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}, {'estimator': {'estimator_name': 'backdoor.causaltune.models.TransformedOutcome'}}]\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cd.preprocess_dataset()\n", - "ct = CausalTune(components_time_budget=5,) \n", - "ct.fit(data=cd, outcome='y_factual')\n", - "ct.causal_model.view_model()" - ] - }, - { - "cell_type": "markdown", - "id": "ca35fcef", - "metadata": {}, - "source": [ - "For how to proceed further with CausalTune, see for instance [here](https://github.com/py-why/causaltune/blob/main/notebooks/Random%20assignment%2C%20binary%20CATE%20example.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "c1be7581", - "metadata": {}, - "source": [ - "### Instrumental variable identification\n", - "\n", - "In other problems of causal inference, one may seek to follow an instrumental variable approach ([Example notebook](https://github.com/py-why/causaltune/blob/main/notebooks/Comparing%20IV%20Estimators.ipynb)). " - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "2a35636e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " x1 x2 x3 x4 y treatment Z\n", - "0 -2.167807 -0.081599 0.354765 -0.470893 0.950792 0 1\n", - "1 0.206365 1.144597 -1.338532 -0.237026 18.188874 1 1\n", - "2 -0.497604 1.264037 1.282048 1.036047 6.519928 0 0\n", - "3 1.092089 0.331639 -0.623374 0.321355 9.221536 0 0\n", - "4 -0.126635 -1.717113 0.645309 -1.320294 11.088779 1 1\n" - ] - } - ], - "source": [ - "#load data\n", - "df = iv_dgp_econml(p=4).data\n", - "del df['random']\n", - "print(df.head(5))" - ] - }, - { - "cell_type": "markdown", - "id": "a012cdff", - "metadata": {}, - "source": [ - "Suppose we want to use $Z$ as an instrument." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "c9be746a", - "metadata": {}, - "outputs": [], - "source": [ - "cd = CausalityDataset(\n", - " data=df, \n", - " treatment='treatment',\n", - " outcomes=['y'],\n", - " instruments=['Z']\n", - " )\n", - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "0bfd06a6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Outcomes: ['y']\n", - "Treatment: treatment\n", - "Instruments: ['Z']\n", - "Effect modifiers: ['x1', 'x2', 'x3', 'x4']\n" - ] - } - ], - "source": [ - "print('Outcomes:', cd.outcomes)\n", - "print('Treatment:', cd.treatment)\n", - "print('Instruments:', cd.instruments)\n", - "print('Effect modifiers:', cd.effect_modifiers)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0e738f3e", - "metadata": {}, - "outputs": [], - "source": [ - "ct = CausalTune(\n", - " components_time_budget=5,\n", - " estimator_list=['iv.econml.iv.dml.DMLIV']\n", - " ) \n", - "ct.fit(data=cd, outcome='y')" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "83f847f9", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ct.causal_model.view_model()" - ] - }, - { - "cell_type": "markdown", - "id": "ecb28b61", - "metadata": {}, - "source": [ - "### Propensity modifiers\n", - "\n", - "If there are well-known propensity modifiers, it is also possible to make those explicit. This can, e.g., be used to pass them directly into the model instead of fitting a propensity weight model (for more details, see [here](https://github.com/py-why/causaltune/blob/main/notebooks/Propensity%20Model%20Selection.ipynb))." - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "b1407bbb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " T Y X1 X2 X3 X4 X5 propensity\n", - "0 1 0.651561 1.266634 -1.493090 -0.139367 -1.234455 0.115191 0.314804\n", - "1 1 1.499142 0.977774 0.426410 0.709403 -0.371737 -1.062126 0.656799\n", - "2 0 -1.504549 0.037244 0.522880 -0.896096 0.838664 -0.006262 0.705601\n", - "3 1 -2.231536 -1.008786 0.058282 0.322617 0.213959 0.256430 0.368792\n", - "4 1 1.108775 1.296887 -0.063358 -1.825230 0.541003 0.221827 0.774054\n" - ] - } - ], - "source": [ - "#load data\n", - "df = generate_non_random_dataset().data\n", - "del df['random']\n", - "print(df.head(5))" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "1b906467", - "metadata": {}, - "outputs": [], - "source": [ - "cd = CausalityDataset(\n", - " data=df, \n", - " treatment='T',\n", - " outcomes=['Y'],\n", - " propensity_modifiers=['propensity']\n", - " )\n", - "cd.preprocess_dataset()" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "71394906", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Outcomes: ['Y']\n", - "Treatment: T\n", - "Propensity Modifiers: ['propensity']\n", - "Effect modifiers: ['X1', 'X2', 'X3', 'X4', 'X5']\n" - ] - } - ], - "source": [ - "print('Outcomes:', cd.outcomes)\n", - "print('Treatment:', cd.treatment)\n", - "print('Propensity Modifiers:', cd.propensity_modifiers)\n", - "print('Effect modifiers:', cd.effect_modifiers)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "359fd218", - "metadata": {}, - "outputs": [], - "source": [ - "ct = CausalTune(\n", - " components_time_budget=5,\n", - ") \n", - "ct.fit(data=cd, outcome='Y')" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "08e0ee9c", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ct.causal_model.view_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cef89ea2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.19" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 6c8be18448d6003652e58fd5eea2c214f703f524 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Tue, 10 Sep 2024 16:08:08 +0100 Subject: [PATCH 14/22] Add files via upload Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- notebooks/CausalityDataset setup.ipynb | 1863 ++++++++++++++++++++++++ 1 file changed, 1863 insertions(+) create mode 100644 notebooks/CausalityDataset setup.ipynb diff --git a/notebooks/CausalityDataset setup.ipynb b/notebooks/CausalityDataset setup.ipynb new file mode 100644 index 00000000..e328bf46 --- /dev/null +++ b/notebooks/CausalityDataset setup.ipynb @@ -0,0 +1,1863 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f3a2f126", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Setting up the data and causal model: CausalityDataset\n", + "\n", + "This notebook demonstrates how to use and configure `CausalityDataset` using an arbitrary `pd.DataFrame`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "d43137b0", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import os, sys\n", + "import warnings\n", + "warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now..\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# the below checks for whether we run dowhy, causaltune, and FLAML from source\n", + "root_path = root_path = os.path.realpath('../..')\n", + "try:\n", + " import causaltune\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"causaltune\"))\n", + "\n", + "try:\n", + " import dowhy\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"dowhy\"))\n", + "\n", + "try:\n", + " import flaml\n", + "except ModuleNotFoundError:\n", + " sys.path.append(os.path.join(root_path, \"FLAML\"))\n", + " \n", + " \n", + " \n", + "from causaltune import CausalTune\n", + "from causaltune.datasets import synth_ihdp, iv_dgp_econml, generate_non_random_dataset\n", + "from causaltune.data_utils import CausalityDataset\n", + "from causaltune.dataset_processor import CausalityDatasetProcessor" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e072c202", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# this makes the notebook expand to full width of the browser window\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "markdown", + "id": "c2a0429f", + "metadata": {}, + "source": [ + "### Random assignment \n", + "We first illustrate the model setup with a subset of data from the Infant Health and Development Program (IHDP)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0efc918c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3
015.599916-0.528603-0.3434551.128554
106.875856-1.736945-1.8020020.383828
202.996273-0.807451-0.202946-0.360898
301.3662060.3900830.596582-1.850350
401.963538-1.045229-0.6027100.011465
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554\n", + "1 0 6.875856 -1.736945 -1.802002 0.383828\n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898\n", + "3 0 1.366206 0.390083 0.596582 -1.850350\n", + "4 0 1.963538 -1.045229 -0.602710 0.011465" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = synth_ihdp(return_df=True).iloc[:,:5]\n", + "display(df.head())" + ] + }, + { + "cell_type": "markdown", + "id": "c5bce66b", + "metadata": {}, + "source": [ + "Generally, at least three arguments have to be supplied to `CausalityDataset`:\n", + "- `data`: input dataframe\n", + "- `treatment`: name of treatment column\n", + "- `outcomes`: list of names of outcome columns; provide as list even if there's just one outcome of interest\n", + "\n", + "In addition, if the propensities to treat are known, then provide the corresponding column name(s) via `propensity_modifiers`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bb50909e", + "metadata": {}, + "outputs": [], + "source": [ + "cd = CausalityDataset(data=df, treatment='treatment', outcomes=['y_factual'])" + ] + }, + { + "cell_type": "markdown", + "id": "73b6395a", + "metadata": {}, + "source": [ + "The next step is to use `cd.preprocess_dataset()` to deal with missing values, remove outliers etc." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8803d695", + "metadata": {}, + "outputs": [], + "source": [ + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "markdown", + "id": "dafa93e0", + "metadata": {}, + "source": [ + "The causal model is built by assuming that all remaining features are `effect_modifiers`" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6695f65f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['x1', 'x2', 'x3']\n" + ] + } + ], + "source": [ + "print(cd.effect_modifiers)" + ] + }, + { + "cell_type": "markdown", + "id": "50447729", + "metadata": {}, + "source": [ + "Subsequently, use the preprocessed `CausalityDataset` object for training as follow: `CausalTune.fit(cd, outcome='y_factual')`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "eb9ebea5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n" + ] + } + ], + "source": [ + "ct = CausalTune(components_time_budget=5,) \n", + "ct.fit(data=cd, outcome='y_factual')" + ] + }, + { + "cell_type": "markdown", + "id": "e8cf75fb", + "metadata": {}, + "source": [ + "The causal graph that CausalTune uses is " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6b9a1ad6", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "ct.causal_model.view_model()" + ] + }, + { + "cell_type": "markdown", + "id": "f0ec03d0", + "metadata": {}, + "source": [ + "*Note that the variable `random` can be ignored and has no real meaning for the causal model.*" + ] + }, + { + "cell_type": "markdown", + "id": "80318c33", + "metadata": {}, + "source": [ + "#### Adding common causes\n", + "\n", + "If we had reason to assume that for instance `x1` and `x2` are `common causes` instead of `effect modifiers`, this can be made explicit:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6babd054", + "metadata": {}, + "outputs": [], + "source": [ + "cd = CausalityDataset(data=df, treatment='treatment', outcomes=['y_factual'], common_causes=['x1', 'x2'])" + ] + }, + { + "cell_type": "markdown", + "id": "256f2054", + "metadata": {}, + "source": [ + "The causal graph becomes" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "510157f0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n" + ] + } + ], + "source": [ + "cd.preprocess_dataset()\n", + "ct = CausalTune(components_time_budget=5,) \n", + "ct.fit(data=cd, outcome='y_factual')\n", + "ct.causal_model.view_model()" + ] + }, + { + "cell_type": "markdown", + "id": "ca35fcef", + "metadata": {}, + "source": [ + "For how to proceed further with CausalTune, see for instance [here](https://github.com/py-why/causaltune/blob/main/notebooks/Random%20assignment%2C%20binary%20CATE%20example.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "c1be7581", + "metadata": {}, + "source": [ + "### Instrumental variable identification\n", + "\n", + "In other problems of causal inference, one may seek to follow an instrumental variable approach ([Example notebook](https://github.com/py-why/causaltune/blob/main/notebooks/Comparing%20IV%20Estimators.ipynb)). " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2a35636e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " x1 x2 x3 x4 y treatment Z\n", + "0 -0.662658 1.124321 -1.699940 -0.379268 5.236122 0 0\n", + "1 -0.788565 1.336684 -0.539586 -0.785838 12.039615 1 1\n", + "2 -0.344655 -0.204201 -1.267158 0.898114 23.469351 1 1\n", + "3 0.125284 -0.557028 0.403744 0.579168 5.300115 0 0\n", + "4 0.356507 0.330607 0.430286 1.201554 12.855370 0 0\n" + ] + } + ], + "source": [ + "#load data\n", + "df = iv_dgp_econml(p=4).data\n", + "del df['random']\n", + "print(df.head(5))" + ] + }, + { + "cell_type": "markdown", + "id": "a012cdff", + "metadata": {}, + "source": [ + "Suppose we want to use $Z$ as an instrument." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c9be746a", + "metadata": {}, + "outputs": [], + "source": [ + "cd = CausalityDataset(\n", + " data=df, \n", + " treatment='treatment',\n", + " outcomes=['y'],\n", + " instruments=['Z']\n", + " )\n", + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0bfd06a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outcomes: ['y']\n", + "Treatment: treatment\n", + "Instruments: ['Z']\n", + "Effect modifiers: ['x1', 'x2', 'x3', 'x4']\n" + ] + } + ], + "source": [ + "print('Outcomes:', cd.outcomes)\n", + "print('Treatment:', cd.treatment)\n", + "print('Instruments:', cd.instruments)\n", + "print('Effect modifiers:', cd.effect_modifiers)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0e738f3e", + "metadata": {}, + "outputs": [], + "source": [ + "ct = CausalTune(\n", + " components_time_budget=5,\n", + " estimator_list=['iv.econml.iv.dml.DMLIV']\n", + " ) \n", + "ct.fit(data=cd, outcome='y')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "83f847f9", + "metadata": {}, + "outputs": [], + "source": [ + "ct.causal_model.view_model()" + ] + }, + { + "cell_type": "markdown", + "id": "ecb28b61", + "metadata": {}, + "source": [ + "### Propensity modifiers\n", + "\n", + "If there are well-known propensity modifiers, it is also possible to make those explicit. This can, e.g., be used to pass them directly into the model instead of fitting a propensity weight model (for more details, see [here](https://github.com/py-why/causaltune/blob/main/notebooks/Propensity%20Model%20Selection.ipynb))." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b1407bbb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " T Y X1 X2 X3 X4 X5 propensity\n", + "0 0 1.650705 0.521524 -1.393497 0.010672 -0.828778 1.019257 0.245100\n", + "1 0 -0.888552 -0.782541 -1.384920 -0.233656 0.150249 -0.495169 0.205945\n", + "2 0 -0.516344 -0.154831 -0.098985 2.335176 -1.888928 -0.594854 0.235870\n", + "3 1 0.601679 0.109516 0.092910 0.525252 -1.172202 -0.177947 0.439021\n", + "4 0 0.569122 -0.365630 -0.343061 -0.420554 -0.995160 1.548502 0.335151\n" + ] + } + ], + "source": [ + "#load data\n", + "df = generate_non_random_dataset().data\n", + "del df['random']\n", + "print(df.head(5))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1b906467", + "metadata": {}, + "outputs": [], + "source": [ + "cd = CausalityDataset(\n", + " data=df, \n", + " treatment='T',\n", + " outcomes=['Y'],\n", + " propensity_modifiers=['propensity']\n", + " )\n", + "cd.preprocess_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "71394906", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outcomes: ['Y']\n", + "Treatment: T\n", + "Propensity Modifiers: ['propensity']\n", + "Effect modifiers: ['X1', 'X2', 'X3', 'X4', 'X5']\n" + ] + } + ], + "source": [ + "print('Outcomes:', cd.outcomes)\n", + "print('Treatment:', cd.treatment)\n", + "print('Propensity Modifiers:', cd.propensity_modifiers)\n", + "print('Effect modifiers:', cd.effect_modifiers)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "359fd218", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n", + "Propensity Model Fitted Successfully\n" + ] + } + ], + "source": [ + "ct = CausalTune(\n", + " components_time_budget=5,\n", + ") \n", + "ct.fit(data=cd, outcome='Y')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "08e0ee9c", + "metadata": {}, + "outputs": [], + "source": [ + "ct.causal_model.view_model()" + ] + }, + { + "cell_type": "markdown", + "id": "818762bf-a3e7-426b-87e7-3cbcaa5d1ef8", + "metadata": {}, + "source": [ + "### Pre-processing of the test dataset based on the training set\n", + "You can also preprocess the data in the CausalityDataset using one of the popular category encoders: OneHot, WoE, Label, Target." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "fd26bb39-e55f-4f76-b225-838ddb16675b", + "metadata": {}, + "outputs": [], + "source": [ + "unique_values_1 = ['A', 'B', 'C', 'D', 'E']\n", + "unique_values_2 = ['F', 'G', 'H', 'I', 'J', 'K']\n", + "unique_values_3 = ['L', 'M', 'N', 'O', 'P', 'Q', 'R']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "9354687b-6d4a-448a-813d-c8f21c761b8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3
015.599916-0.528603-0.3434551.128554
106.875856-1.736945-1.8020020.383828
202.996273-0.807451-0.202946-0.360898
301.3662060.3900830.596582-1.850350
401.963538-1.045229-0.6027100.011465
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554\n", + "1 0 6.875856 -1.736945 -1.802002 0.383828\n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898\n", + "3 0 1.366206 0.390083 0.596582 -1.850350\n", + "4 0 1.963538 -1.045229 -0.602710 0.011465" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_train = synth_ihdp(return_df=True).iloc[:,:5]\n", + "display(df_train.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f22414f8-0624-4e04-9a3d-4dfe31e4f8f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3
015.599916-0.528603-0.3434551.128554
106.875856-1.736945-1.8020020.383828
202.996273-0.807451-0.202946-0.360898
301.3662060.3900830.596582-1.850350
401.963538-1.045229-0.6027100.011465
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554\n", + "1 0 6.875856 -1.736945 -1.802002 0.383828\n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898\n", + "3 0 1.366206 0.390083 0.596582 -1.850350\n", + "4 0 1.963538 -1.045229 -0.602710 0.011465" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_test = synth_ihdp(return_df=True).iloc[:,:5]\n", + "display(df_test.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "70477eb1-9a14-4927-85ef-fb6888d432c7", + "metadata": {}, + "outputs": [], + "source": [ + "# Adding the category columns with random values\n", + "df_train['category_col1'] = np.random.choice(unique_values_1, len(df_train))\n", + "df_train['category_col2'] = np.random.choice(unique_values_2, len(df_train))\n", + "df_train['category_col3'] = np.random.choice(unique_values_3, len(df_train))\n", + "\n", + "df_test['category_col1'] = np.random.choice(unique_values_1, len(df_test))\n", + "df_test['category_col2'] = np.random.choice(unique_values_2, len(df_test))\n", + "df_test['category_col3'] = np.random.choice(unique_values_3, len(df_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ba538a59-a875-4766-a41a-f9099f3add16", + "metadata": {}, + "outputs": [], + "source": [ + "cd_train = CausalityDataset(\n", + " data=df_train,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers=['x1', 'x2', 'x3', 'category_col1', 'category_col2', 'category_col3']\n", + ")\n", + "\n", + "cd_test = CausalityDataset(\n", + " data=df_test,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers=['x1', 'x2', 'x3', 'category_col1', 'category_col2', 'category_col3']\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "71fb2260-ca6e-4a7c-b220-8de92af82917", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3category_col1category_col2category_col3random
015.599916-0.528603-0.3434551.128554EKR1
106.875856-1.736945-1.8020020.383828AFM1
202.996273-0.807451-0.202946-0.360898DHO0
301.3662060.3900830.596582-1.850350DKR0
401.963538-1.045229-0.6027100.011465CKQ0
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 E \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 A \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 D \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 D \n", + "4 0 1.963538 -1.045229 -0.602710 0.011465 C \n", + "\n", + " category_col2 category_col3 random \n", + "0 K R 1 \n", + "1 F M 1 \n", + "2 H O 0 \n", + "3 K R 0 \n", + "4 K Q 0 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cd_train.data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "78d7813b-cc59-4b49-a92a-fb41bae4bd9d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3category_col1category_col2category_col3random
015.599916-0.528603-0.3434551.128554BHM1
106.875856-1.736945-1.8020020.383828CIM0
202.996273-0.807451-0.202946-0.360898BKR0
301.3662060.3900830.596582-1.850350CHP1
401.963538-1.045229-0.6027100.011465AHO1
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 B \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 C \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 B \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 C \n", + "4 0 1.963538 -1.045229 -0.602710 0.011465 A \n", + "\n", + " category_col2 category_col3 random \n", + "0 H M 1 \n", + "1 I M 0 \n", + "2 K R 0 \n", + "3 H P 1 \n", + "4 H O 1 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cd_test.data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "2c4767e4-7aa2-47f5-ad9d-d830e77d78b0", + "metadata": {}, + "source": [ + "You can select one of the categorical encoders: `\"onehot\", \"label\", \"target\", \"woe\"`" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "7c0b8f72-6efb-4812-80f7-164557e9eea6", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_processor = CausalityDatasetProcessor()\n", + "dataset_processor.fit(\n", + " cd=cd_train,\n", + " encoder_type=\"label\"\n", + ")\n", + "cd_train = dataset_processor.transform(cd_train)\n", + "cd_test = dataset_processor.transform(cd_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "7b9ee8de-ecfd-475c-9369-4bc9abd81454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3randomcategory_col1category_col2category_col3
015.599916-0.528603-0.3434551.1285541.0111
106.875856-1.736945-1.8020020.3838281.0222
202.996273-0.807451-0.202946-0.3608980.0333
301.3662060.3900830.596582-1.8503500.0311
401.963538-1.045228-0.6027100.0114650.0414
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 random category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 1.0 1 \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 1.0 2 \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 0.0 3 \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 0.0 3 \n", + "4 0 1.963538 -1.045228 -0.602710 0.011465 0.0 4 \n", + "\n", + " category_col2 category_col3 \n", + "0 1 1 \n", + "1 2 2 \n", + "2 3 3 \n", + "3 1 1 \n", + "4 1 4 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cd_train.data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "82ba4136-d406-4b3d-9b13-4bc765ab925d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3randomcategory_col1category_col2category_col3
015.599916-0.528603-0.3434551.1285541.0532
106.875856-1.736945-1.8020020.3838280.0442
202.996273-0.807451-0.202946-0.3608980.0511
301.3662060.3900830.596582-1.8503501.0436
401.963538-1.045228-0.6027100.0114651.0233
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 random category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 1.0 5 \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 0.0 4 \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 0.0 5 \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 1.0 4 \n", + "4 0 1.963538 -1.045228 -0.602710 0.011465 1.0 2 \n", + "\n", + " category_col2 category_col3 \n", + "0 3 2 \n", + "1 4 2 \n", + "2 1 1 \n", + "3 3 6 \n", + "4 3 3 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cd_test.data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4ef24918-f676-4da8-b77a-ebf1347c7be9", + "metadata": {}, + "source": [ + "### Example of model training on transformed data\n", + "Now if `outcome_model=\"auto\"` in the CausalTune constructor, we search over a simultaneous search space for the EconML estimators and for FLAML wrappers for common regressors. The old behavior is now achieved by `outcome_model=\"nested\"` (Refitting AutoML for each estimator)." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7016148c-d48e-4d1d-a951-160b29b6a37b", + "metadata": {}, + "outputs": [], + "source": [ + "# training configs\n", + "\n", + "# set evaluation metric\n", + "metric = \"energy_distance\"\n", + "\n", + "# it's best to specify either time_budget or components_time_budget, \n", + "# and let the other one be inferred; time in seconds\n", + "components_time_budget = 10\n", + "\n", + "# specify training set size\n", + "train_size = 0.7" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3f6b2cfd-a26e-4c96-8504-7380459d1a3d", + "metadata": {}, + "outputs": [], + "source": [ + "ct = CausalTune(\n", + " estimator_list=[\n", + " \"DomainAdaptationLearner\",\n", + " \"CausalForestDML\",\n", + " \"ForestDRLearner\",\n", + " ],\n", + " metric=metric,\n", + " verbose=1,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "cbdc9c92-33c8-41c4-ab15-8e15703b5a56", + "metadata": {}, + "outputs": [], + "source": [ + "# run causaltune\n", + "ct.fit(data=cd_train, outcome=cd_train.outcomes[0])\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "56d79fe7-bed7-4ccb-96bd-5b0843b149fb", + "metadata": {}, + "outputs": [], + "source": [ + "predictions = ct.predict(cd_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "27bab1dd-abd2-41a4-b30c-62b4722d0872", + "metadata": {}, + "outputs": [], + "source": [ + "predictions" + ] + }, + { + "cell_type": "markdown", + "id": "ef1b4809-cc89-4318-af18-671ba2c70dd5", + "metadata": {}, + "source": [ + "### Using pre-processing in the model object\n", + "- You can also use `preprocess = True` in the `CausalTune` fit method to do preprocessing automatically\n", + "- You should specify `encoder_type`\n", + "- You should also specify `encoder_outcome` (binary target column) for the `\"woe\", \"target\"` encoders, no need for `\"onehot\", \"label\"`" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7d251250-c64a-43b5-b804-1e3e672acb38", + "metadata": {}, + "outputs": [], + "source": [ + "unique_values_1 = ['A', 'B', 'C', 'D', 'E']\n", + "unique_values_2 = ['F', 'G', 'H', 'I', 'J', 'K']\n", + "unique_values_3 = ['L', 'M', 'N', 'O', 'P', 'Q', 'R']\n", + "\n", + "df_train = synth_ihdp(return_df=True).iloc[:,:5]\n", + "df_test = synth_ihdp(return_df=True).iloc[:,:5]\n", + "\n", + "df_train['category_col1'] = np.random.choice(unique_values_1, len(df_train))\n", + "df_train['category_col2'] = np.random.choice(unique_values_2, len(df_train))\n", + "df_train['category_col3'] = np.random.choice(unique_values_3, len(df_train))\n", + "\n", + "df_test['category_col1'] = np.random.choice(unique_values_1, len(df_test))\n", + "df_test['category_col2'] = np.random.choice(unique_values_2, len(df_test))\n", + "df_test['category_col3'] = np.random.choice(unique_values_3, len(df_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "fef7b317-08aa-45bb-8eb1-f8c8eacae428", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3category_col1category_col2category_col3
015.599916-0.528603-0.3434551.128554AJN
106.875856-1.736945-1.8020020.383828BJP
202.996273-0.807451-0.202946-0.360898AJP
301.3662060.3900830.596582-1.850350EFM
401.963538-1.045229-0.6027100.011465DGQ
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 A \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 B \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 A \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 E \n", + "4 0 1.963538 -1.045229 -0.602710 0.011465 D \n", + "\n", + " category_col2 category_col3 \n", + "0 J N \n", + "1 J P \n", + "2 J P \n", + "3 F M \n", + "4 G Q " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "372e47a5-1da8-4273-80d1-ad8392720b4d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmenty_factualx1x2x3category_col1category_col2category_col3
015.599916-0.528603-0.3434551.128554CKN
106.875856-1.736945-1.8020020.383828DIN
202.996273-0.807451-0.202946-0.360898AGP
301.3662060.3900830.596582-1.850350DJM
401.963538-1.045229-0.6027100.011465EHP
\n", + "
" + ], + "text/plain": [ + " treatment y_factual x1 x2 x3 category_col1 \\\n", + "0 1 5.599916 -0.528603 -0.343455 1.128554 C \n", + "1 0 6.875856 -1.736945 -1.802002 0.383828 D \n", + "2 0 2.996273 -0.807451 -0.202946 -0.360898 A \n", + "3 0 1.366206 0.390083 0.596582 -1.850350 D \n", + "4 0 1.963538 -1.045229 -0.602710 0.011465 E \n", + "\n", + " category_col2 category_col3 \n", + "0 K N \n", + "1 I N \n", + "2 G P \n", + "3 J M \n", + "4 H P " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "d0f3ae9f-1f5d-44b8-8c50-953a4ce1e7ef", + "metadata": {}, + "outputs": [], + "source": [ + "cd_train = CausalityDataset(\n", + " data=df_train,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers=['x1', 'x2', 'x3', 'category_col1', 'category_col2', 'category_col3']\n", + ")\n", + "\n", + "cd_test = CausalityDataset(\n", + " data=df_test,\n", + " treatment='treatment',\n", + " outcomes=['y_factual'],\n", + " effect_modifiers=['x1', 'x2', 'x3', 'category_col1', 'category_col2', 'category_col3']\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6c1da720-1ef6-4bba-8bb5-047c9bff01d4", + "metadata": {}, + "outputs": [], + "source": [ + "ct = CausalTune(\n", + " estimator_list=[\n", + " \"DomainAdaptationLearner\",\n", + " \"CausalForestDML\",\n", + " \"ForestDRLearner\",\n", + " ],\n", + " metric=metric,\n", + " verbose=1,\n", + " components_time_budget=components_time_budget,\n", + " train_size=train_size,\n", + " outcome_model=\"auto\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "82699ee7-aafb-45c9-994f-9a0296dfe30a", + "metadata": {}, + "outputs": [], + "source": [ + "# run causaltune\n", + "ct.fit(data=cd_train, outcome=cd_train.outcomes[0], preprocess=True, encoder_type = \"label\")\n", + "\n", + "print('---------------------')\n", + "# return best estimator\n", + "print(f\"Best estimator: {ct.best_estimator}\")\n", + "# config of best estimator:\n", + "print(f\"Best config: {ct.best_config}\")\n", + "# best score:\n", + "print(f\"Best score: {ct.best_score}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "92cbd4ad-ab7d-45dd-a13c-c7c99dacd63b", + "metadata": {}, + "outputs": [], + "source": [ + "predictions = ct.predict(cd_train, preprocess=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d16777d6-f468-469d-813f-da1f90455d37", + "metadata": {}, + "outputs": [], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffaa475c-2cd6-46c8-ab23-f64f0f5f1506", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1210ff8e07bb64c0df6e24ef59e51e7ec3aa5b28 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:38:40 +0100 Subject: [PATCH 15/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index 11ef5313..7941abff 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -8,6 +8,16 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): + """ + A custom dataset processor for CausalTune experiments, designed to preprocess datasets by encoding categorical features, + normalizing numerical features, and handling missing values for causal inference tasks. + + Attributes: + encoder_type (str): The type of encoder used for categorical feature encoding (e.g., 'onehot', 'label', 'target', 'woe'). + outcome (str): The target variable or outcome used for encoding and modeling. + encoder: The encoder object used during feature transformations. + """ + def __init__(self): self.encoder_type = None self.outcome = None From 3ec70002d633056fda32344ae199d962b594d08c Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:41:27 +0100 Subject: [PATCH 16/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index 7941abff..44d7ee71 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -9,16 +9,19 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ - A custom dataset processor for CausalTune experiments, designed to preprocess datasets by encoding categorical features, - normalizing numerical features, and handling missing values for causal inference tasks. - + A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, + and handling missing values. + Attributes: - encoder_type (str): The type of encoder used for categorical feature encoding (e.g., 'onehot', 'label', 'target', 'woe'). - outcome (str): The target variable or outcome used for encoding and modeling. - encoder: The encoder object used during feature transformations. + encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). + outcome (str): The target variable used for encoding. + encoder: Encoder object used during feature transformations. """ def __init__(self): + """ + Initializes CausalityDatasetProcessor with default attributes for encoder_type, outcome, and encoder. + """ self.encoder_type = None self.outcome = None self.encoder = None From aa014e54d45da605afc5a38833e404df06678af9 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:44:02 +0100 Subject: [PATCH 17/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index 44d7ee71..a880adf2 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -11,13 +11,11 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, and handling missing values. - Attributes: encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). outcome (str): The target variable used for encoding. encoder: Encoder object used during feature transformations. """ - def __init__(self): """ Initializes CausalityDatasetProcessor with default attributes for encoder_type, outcome, and encoder. @@ -32,6 +30,15 @@ def fit( encoder_type: Optional[str] = "onehot", outcome: str = None, ): + """ + Fits the processor by preprocessing the input CausalityDataset. + Args: + cd (CausalityDataset): The dataset for causal analysis. + encoder_type (str, optional): Encoder to use for categorical features. Default is 'onehot'. + outcome (str, optional): The target variable for encoding (needed for 'target' or 'woe'). Default is None. + Returns: + CausalityDatasetProcessor: The fitted processor instance. + """ cd = copy.deepcopy(cd) self.preprocess_dataset( cd, encoder_type=encoder_type, outcome=outcome, fit_phase=True From b3dd13208fdc20f2bc664ecfda32db28db5a640b Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:46:16 +0100 Subject: [PATCH 18/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index a880adf2..fb8deee4 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -46,6 +46,15 @@ def fit( return self def transform(self, cd: CausalityDataset): + """ + Transforms the CausalityDataset using the fitted encoder. + Args: + cd (CausalityDataset): Dataset to transform. + Returns: + CausalityDataset: Transformed dataset. + Raises: + ValueError: If processor has not been trained yet. + """ if self.encoder: cd = self.preprocess_dataset( cd, From b8b3a89a5f3a63f8b908ad92f95596e2b9c9e604 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:46:59 +0100 Subject: [PATCH 19/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index fb8deee4..de06ee31 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -10,7 +10,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, - and handling missing values. + and handling missing values Attributes: encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). outcome (str): The target variable used for encoding. From 390b0766b26909cfcb2a93ae8cc506c9372d5c04 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:47:05 +0100 Subject: [PATCH 20/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index de06ee31..fb8deee4 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -10,7 +10,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, - and handling missing values + and handling missing values. Attributes: encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). outcome (str): The target variable used for encoding. From 582d686d95b5d407876b094681e46303ced9dea3 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:47:11 +0100 Subject: [PATCH 21/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index fb8deee4..de06ee31 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -10,7 +10,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, - and handling missing values. + and handling missing values Attributes: encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). outcome (str): The target variable used for encoding. From f533461dfacc9f6418a81a8073c1a2631e4dcff1 Mon Sep 17 00:00:00 2001 From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:47:17 +0100 Subject: [PATCH 22/22] Update dataset_processor.py Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com> --- causaltune/dataset_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index de06ee31..fb8deee4 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -10,7 +10,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin): """ A processor for CausalityDataset, designed to preprocess data for causal inference tasks by encoding, normalizing, - and handling missing values + and handling missing values. Attributes: encoder_type (str): Type of encoder used for categorical feature encoding ('onehot', 'label', 'target', 'woe'). outcome (str): The target variable used for encoding.