diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py
index fb8deee4..eba02c09 100644
--- a/causaltune/dataset_processor.py
+++ b/causaltune/dataset_processor.py
@@ -16,6 +16,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin):
outcome (str): The target variable used for encoding.
encoder: Encoder object used during feature transformations.
"""
+
def __init__(self):
"""
Initializes CausalityDatasetProcessor with default attributes for encoder_type, outcome, and encoder.
diff --git a/causaltune/datasets.py b/causaltune/datasets.py
index f4f1406d..880940e6 100644
--- a/causaltune/datasets.py
+++ b/causaltune/datasets.py
@@ -1,5 +1,7 @@
import pandas as pd
import numpy as np
+import pickle
+import os
from scipy import special
# from scipy.stats import betabinom
@@ -12,10 +14,8 @@
def linear_multi_dataset(
- n_points=10000,
- impact=None,
- include_propensity=False,
- include_control=False) -> CausalityDataset:
+ n_points=10000, impact=None, include_propensity=False, include_control=False
+) -> CausalityDataset:
if impact is None:
impact = {0: 0.0, 1: 2.0, 2: 1.0}
df = pd.DataFrame(
@@ -80,8 +80,9 @@ def nhefs() -> CausalityDataset:
df = df.loc[~missing]
df = df[covariates + ["qsmk"] + ["wt82_71"]]
- df.rename(columns={c: "x" + str(i + 1)
- for i, c in enumerate(covariates)}, inplace=True)
+ df.rename(
+ columns={c: "x" + str(i + 1) for i, c in enumerate(covariates)}, inplace=True
+ )
return CausalityDataset(df, treatment="qsmk", outcomes=["wt82_71"])
@@ -172,8 +173,7 @@ def amazon_reviews(rating="pos") -> CausalityDataset:
gdown.download(url, "amazon_" + rating + ".csv", fuzzy=True)
df = pd.read_csv("amazon_" + rating + ".csv")
df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)
- df.columns = ["treatment", "y_factual"] + \
- ["x" + str(i) for i in range(1, 301)]
+ df.columns = ["treatment", "y_factual"] + ["x" + str(i) for i in range(1, 301)]
return CausalityDataset(df, "treatment", ["y_factual"])
else:
print(
@@ -226,14 +226,10 @@ def synth_ihdp(return_df=False) -> CausalityDataset:
data.columns = col
# drop the columns we don't care about
ignore_patterns = ["y_cfactual", "mu"]
- ignore_cols = [c for c in data.columns if any(
- [s in c for s in ignore_patterns])]
+ ignore_cols = [c for c in data.columns if any([s in c for s in ignore_patterns])]
data = data.drop(columns=ignore_cols)
- return CausalityDataset(
- data,
- "treatment",
- ["y_factual"]) if not return_df else data
+ return CausalityDataset(data, "treatment", ["y_factual"]) if not return_df else data
def synth_acic(condition=1) -> CausalityDataset:
@@ -347,6 +343,7 @@ def generate_synthetic_data(
noisy_outcomes: bool = False,
effect_size: Union[int, None] = None,
add_instrument: bool = False,
+ known_propensity: bool = False,
) -> CausalityDataset:
"""Generates synthetic dataset with conditional treatment effect (CATE) and optional instrumental variable.
Supports RCT (unconfounded) and observational (confounded) data.
@@ -385,11 +382,15 @@ def generate_synthetic_data(
p = np.clip(p, 0.1, 0.9)
C = p > np.random.rand(n_samples)
# print(min(p), max(p))
-
else:
p = 0.5 * np.ones(n_samples)
C = np.random.binomial(n=1, p=0.5, size=n_samples)
+ if known_propensity:
+ known_p = np.random.beta(2, 5, size=n_samples)
+ else:
+ known_p = p
+
if add_instrument:
Z = np.random.binomial(n=1, p=0.5, size=n_samples)
C0 = np.random.binomial(n=1, p=0.006, size=n_samples)
@@ -416,18 +417,11 @@ def mu(X):
Y = tau * T + Y_base
features = [f"X{i+1}" for i in range(n_covariates)]
- df = pd.DataFrame(np.array([*X.T,
- T,
- Y,
- tau,
- p,
- Y_base]).T,
- columns=features + ["treatment",
- "outcome",
- "true_effect",
- "propensity",
- "base_outcome"],
- )
+ df = pd.DataFrame(
+ np.array([*X.T, T, Y, tau, known_p, Y_base]).T,
+ columns=features
+ + ["treatment", "outcome", "true_effect", "propensity", "base_outcome"],
+ )
data = CausalityDataset(
data=df,
treatment="treatment",
@@ -450,6 +444,7 @@ def generate_linear_synthetic_data(
noisy_outcomes: bool = False,
effect_size: Union[int, None] = None,
add_instrument: bool = False,
+ known_propensity: bool = False,
) -> CausalityDataset:
"""Generates synthetic dataset with linear treatment effect (CATE) and optional instrumental variable.
Supports RCT (unconfounded) and observational (confounded) data.
@@ -494,6 +489,11 @@ def generate_linear_synthetic_data(
p = 0.5 * np.ones(n_samples)
C = np.random.binomial(n=1, p=0.5, size=n_samples)
+ if known_propensity:
+ known_p = np.random.beta(2, 5, size=n_samples)
+ else:
+ known_p = p
+
if add_instrument:
Z = np.random.binomial(n=1, p=0.5, size=n_samples)
C0 = np.random.binomial(n=1, p=0.006, size=n_samples)
@@ -520,18 +520,11 @@ def mu(X):
Y = tau * T + Y_base
features = [f"X{i+1}" for i in range(n_covariates)]
- df = pd.DataFrame(np.array([*X.T,
- T,
- Y,
- tau,
- p,
- Y_base]).T,
- columns=features + ["treatment",
- "outcome",
- "true_effect",
- "propensity",
- "base_outcome"],
- )
+ df = pd.DataFrame(
+ np.array([*X.T, T, Y, tau, known_p, Y_base]).T,
+ columns=features
+ + ["treatment", "outcome", "true_effect", "propensity", "base_outcome"],
+ )
data = CausalityDataset(
data=df,
treatment="treatment",
@@ -641,16 +634,8 @@ def generate_non_random_dataset(num_samples=1000):
)
treatment = np.random.binomial(1, propensity)
outcome = (
- 0.2
- * treatment
- + 0.5
- * x1
- - 0.2
- * x2
- + np.random.normal(
- 0,
- 1,
- num_samples))
+ 0.2 * treatment + 0.5 * x1 - 0.2 * x2 + np.random.normal(0, 1, num_samples)
+ )
dataset = {
"T": treatment,
@@ -729,3 +714,41 @@ def mlrate_experiment_synth_dgp(
cd = CausalityDataset(data=df, outcomes=["Y"], treatment="T")
return cd
+
+
+def save_dataset(dataset: CausalityDataset, filename: str):
+ """
+ Save a CausalityDataset object to a file using pickle.
+
+ Args:
+ dataset (CausalityDataset): The dataset to save.
+ filename (str): The name of the file to save the dataset to.
+ """
+ with open(filename, "wb") as f:
+ pickle.dump(dataset, f)
+ print(f"Dataset saved to {filename}")
+
+
+def load_dataset(filename: str) -> CausalityDataset:
+ """
+ Load a CausalityDataset object from a file using pickle.
+
+ Args:
+ filename (str): The name of the file to load the dataset from.
+
+ Returns:
+ CausalityDataset: The loaded dataset.
+ """
+ if not os.path.exists(filename):
+ raise FileNotFoundError(f"File {filename} not found.")
+
+ with open(filename, "rb") as f:
+ dataset = pickle.load(f)
+
+ if not isinstance(dataset, CausalityDataset):
+ raise ValueError(
+ f"The file {filename} does not contain a valid CausalityDataset object."
+ )
+
+ print(f"Dataset loaded from {filename}")
+ return dataset
diff --git a/causaltune/erupt.py b/causaltune/erupt.py
index d7b8fd30..dbca725b 100644
--- a/causaltune/erupt.py
+++ b/causaltune/erupt.py
@@ -4,6 +4,8 @@
import pandas as pd
import numpy as np
+from dowhy.causal_estimator import CausalEstimate
+
# implementation of https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3111957
# we assume treatment takes integer values from 0 to n
@@ -28,12 +30,32 @@ def __init__(
self,
treatment_name: str,
propensity_model,
- X_names: None = Optional[List[str]],
+ X_names: Optional[List[str]] = None,
clip: float = 0.05,
remove_tiny: bool = True,
+ time_budget: Optional[float] = 30.0, # Add default time budget
):
+ """
+ Initialize ERUPT with thompson sampling capability.
+
+ Args:
+ treatment_name (str): Name of treatment column
+ propensity_model: Model for estimating propensity scores
+ X_names (Optional[List[str]]): Names of feature columns
+ clip (float): Clipping threshold for propensity scores
+ remove_tiny (bool): Whether to remove tiny weights
+ time_budget (Optional[float]): Time budget for AutoML propensity fitting
+ """
self.treatment_name = treatment_name
self.propensity_model = copy.deepcopy(propensity_model)
+
+ # If propensity model is AutoML, ensure it has time_budget
+ if (
+ hasattr(self.propensity_model, "time_budget")
+ and self.propensity_model.time_budget is None
+ ):
+ self.propensity_model.time_budget = time_budget
+
self.X_names = X_names
self.clip = clip
self.remove_tiny = remove_tiny
@@ -48,7 +70,7 @@ def score(
) -> pd.Series:
# TODO: make it accept both array and callable as policy
w = self.weights(df, policy)
- return (w * outcome).mean()
+ return np.round((w * outcome).mean(), decimals=12)
def weights(
self, df: pd.DataFrame, policy: Union[Callable, np.ndarray, pd.Series]
@@ -58,117 +80,405 @@ def weights(
[x >= 0 for x in W.unique()]
), "Treatment values must be non-negative integers"
+ # Handle policy input
if callable(policy):
policy = policy(df).astype(int)
if isinstance(policy, pd.Series):
policy = policy.values
policy = np.array(policy)
-
d = pd.Series(index=df.index, data=policy)
assert all(
[x >= 0 for x in d.unique()]
), "Policy values must be non-negative integers"
+ # Get propensity scores with better handling of edge cases
if isinstance(self.propensity_model, DummyPropensity):
p = self.propensity_model.predict_proba()
else:
- p = self.propensity_model.predict_proba(df[self.X_names])
- # normalize to hopefully avoid NaNs
- p = np.maximum(p, 1e-4)
+ try:
+ p = self.propensity_model.predict_proba(df[self.X_names])
+ except Exception:
+ # Fallback to safe defaults if prediction fails
+ p = np.full((len(df), 2), 0.5)
- weight = np.zeros(len(df))
+ # Clip propensity scores to avoid division by zero or extreme weights
+ min_clip = max(1e-6, self.clip) # Ensure minimum clip is not too small
+ p = np.clip(p, min_clip, 1 - min_clip)
- for i in W.unique():
- weight[W == i] = 1 / p[:, i][W == i]
+ # Initialize weights
+ weight = np.zeros(len(df))
+ try:
+ # Calculate weights with safer operations
+ for i in W.unique():
+ mask = W == i
+ p_i = p[:, i][mask]
+ # Add small constant to denominator to prevent division by zero
+ weight[mask] = 1 / (p_i + 1e-10)
+ except Exception:
+ # If something goes wrong, return safe weights
+ weight = np.ones(len(df))
+
+ # Zero out weights where policy disagrees with actual treatment
weight[d != W] = 0.0
+ # Handle extreme weights
if self.remove_tiny:
weight[weight > 1 / self.clip] = 0.0
else:
weight[weight > 1 / self.clip] = 1 / self.clip
- # and just for paranoia's sake let's normalize, though it shouldn't
- # matter for big samples
- weight *= len(df) / sum(weight)
+ # Normalize weights
+ sum_weight = weight.sum()
+ if sum_weight > 0:
+ weight *= len(df) / sum_weight
+ else:
+ # If all weights are zero, use uniform weights
+ weight = np.ones(len(df)) / len(df)
- assert not np.isnan(weight.sum()), "NaNs in ERUPT weights"
+ # Final check for NaNs
+ if np.any(np.isnan(weight)):
+ # Replace any remaining NaNs with uniform weights
+ weight = np.ones(len(df)) / len(df)
return pd.Series(index=df.index, data=weight)
- # NEW:
-
def probabilistic_erupt_score(
self,
df: pd.DataFrame,
outcome: pd.Series,
- treatment_effects: pd.Series,
- treatment_std_devs: pd.Series,
- iterations: int = 1000
+ estimate: CausalEstimate,
+ n_samples: int = 1000,
+ clip: Optional[float] = None,
) -> float:
"""
- Calculate the Probabilistic ERUPT (Expected Response Under Proposed
- Treatments) score.
-
- This method uses Monte Carlo simulation to estimate the expected
- outcome under a probabilistic treatment policy, accounting for
- uncertainty in treatment effects. It balances potential improvements
- against estimation uncertainty and treatment rates.
+ Calculate ERUPT score using Thompson sampling to create a probabilistic policy.
Args:
- df (pd.DataFrame): The input dataframe containing treatment
- information.
- outcome (pd.Series): The observed outcomes for each unit.
- treatment_effects (pd.Series): Estimated treatment effects for
- each unit.
- treatment_std_devs (pd.Series): Standard deviations of treatment
- effects.
- iterations (int): Number of Monte Carlo iterations (default: 1000).
+ df (pd.DataFrame): Input dataframe
+ outcome (pd.Series): Observed outcomes
+ estimate (CausalEstimate): Causal estimate containing the estimator
+ n_samples (int): Number of Thompson sampling iterations
+ clip (float): Optional clipping value for effect std estimates
Returns:
- float: The Probabilistic ERUPT score, representing the relative
- improvement over the baseline outcome, adjusted for uncertainty.
+ float: Thompson sampling ERUPT score
"""
- # Calculate the baseline outcome (mean outcome for untreated units)
- baseline_outcome = outcome[df[self.treatment_name] == 0].mean()
-
- policy_values = []
- treatment_decisions = []
-
- # Perform Monte Carlo simulation
- for _ in range(iterations):
- # Sample treatment effects from normal distributions
- sampled_effects = pd.Series(
- np.random.normal(treatment_effects, treatment_std_devs),
- index=treatment_effects.index
- )
-
- # Define policy: treat if sampled effect is positive
- # Note: A more conservative policy could use: sampled_effects > 2 *
- # treatment_std_devs
- policy = (sampled_effects > 0).astype(int)
-
- # Calculate expected outcome under this policy
- expected_outcome = (
- baseline_outcome
- + (policy * sampled_effects).mean()
- )
-
- policy_values.append(expected_outcome)
- treatment_decisions.append(policy.mean())
-
- # Calculate mean and standard error of policy values
- mean_value = np.mean(policy_values)
- se_value = np.std(policy_values) / np.sqrt(iterations)
-
- # Placeholder for potential treatment rate penalty
- treatment_penalty = 0
-
- # Calculate score: mean value minus 2 standard errors, adjusted for
- # treatment penalty
- score = (mean_value - 2 * se_value) * (1 - treatment_penalty)
-
- # Calculate relative improvement over baseline
- improvement = (score - baseline_outcome) / baseline_outcome
-
- return improvement
+ est = estimate.estimator
+ cate_estimate = est.effect(df)
+ if len(cate_estimate.shape) > 1 and cate_estimate.shape[1] == 1:
+ cate_estimate = cate_estimate.reshape(-1)
+
+ # Get standard errors using established methods if available
+ try:
+ if "Econml" in str(type(est)):
+ effect_stds = est.effect_stderr(df)
+ else:
+ # Use empirical std as proxy for uncertainty
+ effect_stds = np.std(cate_estimate) * np.ones_like(cate_estimate) * 0.5
+
+ effect_stds = np.squeeze(effect_stds)
+ if clip:
+ effect_stds = np.clip(effect_stds, clip, None)
+
+ except Exception:
+ # If standard error estimation fails, use empirical std
+ effect_stds = np.std(cate_estimate) * np.ones_like(cate_estimate) * 0.5
+ if clip:
+ effect_stds = np.clip(effect_stds, clip, None)
+
+ # Ensure propensity scores are available
+ if not hasattr(self, "propensity_model"):
+ return 0.0
+
+ # Cache propensity predictions to avoid recomputing
+ try:
+ if isinstance(self.propensity_model, DummyPropensity):
+ p = self.propensity_model.predict_proba()
+ else:
+ p = self.propensity_model.predict_proba(df[self.X_names])
+ p = np.maximum(p, 1e-4)
+ except Exception:
+ return 0.0
+
+ # Perform Thompson sampling using matrix operations
+ n_units = len(df)
+ scores = np.zeros(n_samples)
+
+ # Pre-calculate base weights
+ W = df[self.treatment_name].astype(int)
+ base_weights = np.zeros(len(df))
+ for i in W.unique():
+ base_weights[W == i] = 1 / p[:, i][W == i]
+
+ # Sample n_samples sets of effects
+ samples = np.random.normal(
+ loc=cate_estimate.reshape(-1, 1),
+ scale=effect_stds.reshape(-1, 1),
+ size=(n_units, n_samples),
+ )
+
+ # Convert sampled effects to binary policies
+ sampled_policies = (samples > 0).astype(int)
+
+ # Calculate scores efficiently
+ for i in range(n_samples):
+ policy = sampled_policies[:, i]
+ weights = base_weights.copy()
+ weights[policy != W] = 0.0
+
+ if self.remove_tiny:
+ weights[weights > 1 / self.clip] = 0.0
+ else:
+ weights[weights > 1 / self.clip] = 1 / self.clip
+
+ if weights.sum() > 0:
+ weights *= len(df) / weights.sum()
+ scores[i] = (weights * outcome.values).mean()
+
+ # Return mean non-zero score
+ valid_scores = scores[scores != 0]
+ if len(valid_scores) > 0:
+ return np.mean(valid_scores)
+ return 0.0
+
+ def thompson_weights(
+ self,
+ df: pd.DataFrame,
+ cate_estimate: np.ndarray,
+ effect_stds: np.ndarray,
+ n_samples: int = 1,
+ ) -> pd.Series:
+ """Helper method to get weights for a single Thompson sampling iteration"""
+ samples = np.random.normal(cate_estimate, effect_stds)
+ policy = (samples > 0).astype(int)
+ return self.weights(df, lambda x: policy)
+
+ # def probabilistic_erupt_score(
+ # self,
+ # df: pd.DataFrame,
+ # outcome: pd.Series,
+ # estimate: CausalEstimate,
+ # cate_estimate: np.ndarray,
+ # sd_threshold: float = 1e-2,
+ # iterations: int = 1000
+ # ) -> float:
+ # """
+ # Calculate the Probabilistic ERUPT score using Thompson sampling to select
+ # optimal treatments under uncertainty.
+
+ # This implementation utilizes Thompson sampling by selecting treatments that
+ # maximize expected outcomes based on sampled treatment effects. For each iteration,
+ # effects are sampled from posterior distributions and treatments are assigned
+ # to maximize the expected outcome.
+
+ # Args:
+ # df (pd.DataFrame): Input dataframe with treatment data
+ # outcome (pd.Series): Observed outcomes for each unit
+ # estimate (CausalEstimate): Causal estimate to evaluate
+ # cate_estimate (np.ndarray): Array with CATE estimates
+ # sd_threshold (float): Minimum standard deviation to consider meaningful variation
+ # iterations (int): Number of Thompson sampling iterations
+
+ # Returns:
+ # float: Probabilistic ERUPT score or 0 if variance estimation not available
+ # """
+ # est = estimate.estimator
+
+ # # Check if estimator supports inference
+ # if not hasattr(est, 'inference') or not hasattr(est, 'effect_stderr'):
+ # return 0
+
+ # try:
+ # # Get standard errors
+ # effect_stds = est.effect_stderr(df)
+
+ # # Check if we got valid standard errors
+ # if effect_stds is None:
+ # return 0
+
+ # # Check for meaningful heterogeneity in treatment effects
+ # cate_std = np.std(cate_estimate)
+ # if cate_std < sd_threshold:
+ # return 0
+
+ # unique_treatments = df[self.treatment_name].unique()
+ # treatment_scores = {treatment: [] for treatment in unique_treatments}
+
+ # # Normalize standard errors relative to effect size variation
+ # effect_stds = np.maximum(effect_stds, cate_std * 0.1) # Prevent overconfidence
+
+ # # Calculate baseline outcome for reference
+ # baseline_outcome = outcome[df[self.treatment_name] == 0].mean()
+
+ # # Perform Thompson sampling iterations
+ # for _ in range(iterations):
+ # # Sample effects while maintaining relative relationships
+ # sampled_effects = np.random.normal(cate_estimate, effect_stds)
+
+ # # Apply treatment policy based on sampled effects
+ # policy = (sampled_effects > np.median(sampled_effects)).astype(int)
+
+ # # Calculate weights for this policy
+ # weights = self.weights(df, policy)
+
+ # # Skip if weights sum to zero
+ # if weights.sum() == 0:
+ # continue
+
+ # # Calculate mean outcome under this policy
+ # weighted_outcome = (weights * outcome).sum() / weights.sum()
+ # treatment_scores[1].append(weighted_outcome) # Store under treatment=1
+
+ # # If no valid iterations, return 0
+ # if not any(scores for scores in treatment_scores.values()):
+ # return 0
+
+ # # Calculate improvement over baseline
+ # average_treatment_outcome = np.mean(treatment_scores[1])
+ # relative_improvement = (average_treatment_outcome - baseline_outcome) / abs(baseline_outcome)
+
+ # return relative_improvement
+
+ # except (AttributeError, ValueError) as e:
+ # return 0
+
+ # def probabilistic_erupt_score(
+ # self,
+ # df: pd.DataFrame,
+ # outcome: pd.Series,
+ # estimate: CausalEstimate,
+ # cate_estimate: np.ndarray,
+ # sd_threshold: float = 1e-2,
+ # iterations: int = 1000,
+ # ) -> float:
+ # """[Previous docstring remains the same]"""
+ # est = estimate.estimator
+
+ # print(
+ # f"\nDebugging Probabilistic ERUPT for estimator: {est.__class__.__name__}"
+ # )
+ # print("CATE estimate summary:")
+ # print(f"Mean: {np.mean(cate_estimate):.4f}")
+ # print(f"Std: {np.std(cate_estimate):.4f}")
+ # print(f"Min: {np.min(cate_estimate):.4f}")
+ # print(f"Max: {np.max(cate_estimate):.4f}")
+
+ # try:
+ # # Different approaches to get standard errors based on estimator type
+ # effect_stds = None
+
+ # # For DML and DR learners
+ # if hasattr(est, "effect_stderr"):
+ # try:
+ # effect_stds = est.effect_stderr(df)
+ # if effect_stds is not None:
+ # # Ensure correct shape
+ # effect_stds = np.squeeze(effect_stds)
+ # print("Got std errors from effect_stderr")
+ # except Exception as e:
+ # print(f"effect_stderr failed: {str(e)}")
+
+ # # For metalearners
+ # if effect_stds is None and hasattr(est, "effect_inference"):
+ # try:
+ # inference_result = est.effect_inference(df)
+ # if hasattr(inference_result, "stderr"):
+ # effect_stds = inference_result.stderr
+ # effect_stds = np.squeeze(effect_stds)
+ # print("Got std errors from effect_inference")
+ # except Exception as e:
+ # print(f"effect_inference failed: {str(e)}")
+
+ # # If we still don't have valid standard errors, try inference method
+ # if effect_stds is None and hasattr(est, "inference"):
+ # try:
+ # inference_result = est.inference()
+ # if hasattr(inference_result, "stderr"):
+ # effect_stds = inference_result.stderr
+ # effect_stds = np.squeeze(effect_stds)
+ # print("Got std errors from inference")
+ # except Exception as e:
+ # print(f"inference failed: {str(e)}")
+
+ # # Final check if we got valid standard errors
+ # if effect_stds is None:
+ # print("Could not obtain valid standard errors")
+ # return 0
+
+ # # Check shapes match
+ # if effect_stds.shape != cate_estimate.shape:
+ # print(
+ # f"Shape mismatch: effect_stds {effect_stds.shape} vs cate_estimate {cate_estimate.shape}"
+ # )
+ # effect_stds = np.broadcast_to(effect_stds, cate_estimate.shape)
+
+ # print("\nStandard errors summary:")
+ # print(f"Mean: {np.mean(effect_stds):.4f}")
+ # print(f"Std: {np.std(effect_stds):.4f}")
+ # print(f"Min: {np.min(effect_stds):.4f}")
+ # print(f"Max: {np.max(effect_stds):.4f}")
+
+ # # Check for meaningful heterogeneity
+ # cate_std = np.std(cate_estimate)
+ # if cate_std < sd_threshold:
+ # print(
+ # f"CATE std {cate_std:.4f} below threshold {sd_threshold} - returning 0"
+ # )
+ # return 0
+
+ # unique_treatments = df[self.treatment_name].unique()
+ # print(f"\nUnique treatments: {unique_treatments}")
+ # treatment_scores = {treatment: [] for treatment in unique_treatments}
+
+ # # Normalize standard errors relative to effect size variation
+ # effect_stds = np.maximum(effect_stds, cate_std * 0.1)
+
+ # # Calculate baseline
+ # baseline_outcome = outcome[df[self.treatment_name] == 0].mean()
+ # print(f"Baseline outcome: {baseline_outcome:.4f}")
+
+ # print("\nStarting Thompson sampling iterations...")
+
+ # # Perform Thompson sampling iterations
+ # for _ in range(iterations):
+ # # Sample effects from posterior distributions for each treatment
+ # sampled_effects = {
+ # treatment: np.random.normal(cate_estimate, effect_stds)
+ # for treatment in unique_treatments
+ # }
+
+ # # Select treatment with highest sampled effect
+ # chosen_treatment = max(
+ # sampled_effects, key=lambda k: np.mean(sampled_effects[k])
+ # )
+
+ # # Calculate weights for the chosen treatment policy
+ # weights = self.weights(
+ # df, lambda x: np.array([chosen_treatment] * len(x))
+ # )
+
+ # # # Calculate mean outcome under this policy
+ # if weights.sum() > 0:
+ # mean_outcome = (weights * outcome).sum() / weights.sum()
+ # treatment_scores[chosen_treatment].append(mean_outcome)
+
+ # # Calculate final score
+ # if not any(scores for scores in treatment_scores.values()):
+ # print("No valid treatment scores")
+ # return 0
+
+ # average_outcomes = np.mean(
+ # [np.mean(scores) for scores in treatment_scores.values() if scores]
+ # )
+
+ # relative_improvement = (average_outcomes - baseline_outcome) / abs(
+ # baseline_outcome
+ # )
+ # print(f"Final relative improvement: {relative_improvement:.4f}")
+
+ # return relative_improvement
+
+ # except Exception as e:
+ # print(f"Exception occurred: {str(e)}")
+ # return 0
diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 803a5982..34497e43 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -179,9 +179,9 @@ def __init__(
resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
)
self._settings["try_init_configs"] = try_init_configs
- self._settings[
- "include_experimental_estimators"
- ] = include_experimental_estimators
+ self._settings["include_experimental_estimators"] = (
+ include_experimental_estimators
+ )
# params for FLAML on component models:
self._settings["component_models"] = {}
@@ -515,6 +515,7 @@ def fit(
"energy_distance",
"psw_energy_distance",
"frobenius_norm",
+ "psw_frobenius_norm",
"codec",
"policy_risk",
]
@@ -564,19 +565,24 @@ def _tune_with_config(self, config: dict) -> dict:
est_name = estimates["estimator_name"]
current_score = estimates[self.metric]
+ estimates["optimization_score"] = current_score
+
# Initialize best_score if this is the first estimator for this name
if est_name not in self._best_estimators:
self._best_estimators[est_name] = (
- np.inf
- if self.metric
- in [
- "energy_distance",
- "psw_energy_distance",
- "frobenius_norm",
- "codec",
- "policy_risk",
- ]
- else -np.inf,
+ (
+ np.inf
+ if self.metric
+ in [
+ "energy_distance",
+ "psw_energy_distance",
+ "frobenius_norm",
+ "psw_frobenius_norm",
+ "codec",
+ "policy_risk",
+ ]
+ else -np.inf
+ ),
None,
)
@@ -587,6 +593,7 @@ def _tune_with_config(self, config: dict) -> dict:
"energy_distance",
"psw_energy_distance",
"frobenius_norm",
+ "psw_frobenius_norm",
"codec",
"policy_risk",
]:
@@ -609,9 +616,11 @@ def _tune_with_config(self, config: dict) -> dict:
):
self._best_estimators[est_name] = (
current_score,
- estimates["estimator"]
- if self._settings["store_all"]
- else estimates.pop("estimator"),
+ (
+ estimates["estimator"]
+ if self._settings["store_all"]
+ else estimates.pop("estimator")
+ ),
)
return estimates
diff --git a/causaltune/scoring.py b/causaltune/scoring.py
index ac6e6ab3..5f20c99f 100644
--- a/causaltune/scoring.py
+++ b/causaltune/scoring.py
@@ -18,14 +18,18 @@
import dcor
-# Imports for CODEC
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
+from scipy.stats import kendalltau
+
+from sklearn.preprocessing import StandardScaler
+
class DummyEstimator:
- def __init__(self, cate_estimate: np.ndarray,
- effect_intervals: Optional[np.ndarray] = None):
+ def __init__(
+ self, cate_estimate: np.ndarray, effect_intervals: Optional[np.ndarray] = None
+ ):
self.cate_estimate = cate_estimate
self.effect_intervals = effect_intervals
@@ -33,12 +37,9 @@ def const_marginal_effect(self, X):
return self.cate_estimate
-def supported_metrics(
- problem: str,
- multivalue: bool,
- scores_only: bool) -> List[str]:
+def supported_metrics(problem: str, multivalue: bool, scores_only: bool) -> List[str]:
if problem == "iv":
- metrics = ["energy_distance"]
+ metrics = ["energy_distance", "frobenius_norm", "codec"]
if not scores_only:
metrics.append("ate")
return metrics
@@ -59,7 +60,8 @@ def supported_metrics(
"energy_distance",
"psw_energy_distance",
"frobenius_norm", # NEW
- "codec" # NEW
+ "codec", # NEW
+ "bite", # NEW
]
if not scores_only:
metrics.append("ate")
@@ -109,13 +111,12 @@ def __init__(
},
).estimator
- if not hasattr(
- self.psw_estimator,
- 'estimator') or not hasattr(
- self.psw_estimator.estimator,
- 'propensity_model'):
+ if not hasattr(self.psw_estimator, "estimator") or not hasattr(
+ self.psw_estimator.estimator, "propensity_model"
+ ):
raise ValueError(
- "Propensity model fitting failed. Please check the setup.")
+ "Propensity model fitting failed. Please check the setup."
+ )
else:
print("Propensity Model Fitted Successfully")
@@ -171,8 +172,7 @@ def resolve_metric(self, metric: str) -> str:
"""
- metrics = supported_metrics(
- self.problem, self.multivalue, scores_only=True)
+ metrics = supported_metrics(self.problem, self.multivalue, scores_only=True)
if metric not in metrics:
logging.warning(
@@ -200,16 +200,12 @@ def resolve_reported_metrics(
List[str]: list of valid metrics.
"""
- metrics = supported_metrics(
- self.problem,
- self.multivalue,
- scores_only=False)
+ metrics = supported_metrics(self.problem, self.multivalue, scores_only=False)
if metrics_to_report is None:
return metrics
else:
- metrics_to_report = sorted(
- list(set(metrics_to_report + [scoring_metric])))
+ metrics_to_report = sorted(list(set(metrics_to_report + [scoring_metric])))
for m in metrics_to_report.copy():
if m not in metrics:
logging.warning(
@@ -276,92 +272,149 @@ def frobenius_norm_score(
estimate: CausalEstimate,
df: pd.DataFrame,
sd_threshold: float = 1e-2,
+ epsilon: float = 1e-5,
+ alpha: float = 0.5,
) -> float:
"""
- Calculate Frobenius norm-based score between treated and controls,
- using propensity score weighting.
+ Calculate adaptive Frobenius norm-based score between treated and controls.
+ Automatically determines whether to use propensity score weighting based on:
+ 1. Problem type (IV vs backdoor)
+ 2. Data characteristics (presence of propensity modifiers/instruments)
+ 3. Estimator properties
Args:
estimate (CausalEstimate): causal estimate to evaluate
df (pandas.DataFrame): input dataframe
- sd_threshold (float): threshold for standard deviation of CATE
- estimates
+ sd_threshold (float): threshold for standard deviation of CATE estimates
+ epsilon (float): small regularization constant
+ alpha (float): weight between Frobenius norm and variance component
Returns:
- float: Frobenius norm-based score, or np.inf if calculation is
- not possible
+ float: Frobenius norm-based score, with propensity weighting if applicable
"""
- # Attempt to get CATE estimates, handling potential AttributeErrors
+ # Get CATE estimates
try:
cate_estimates = estimate.estimator.effect(df)
except AttributeError:
try:
cate_estimates = estimate.estimator.effect_tt(df)
except AttributeError:
- return np.inf # Return inf if neither method is available
+ return np.inf
- # Check if CATE estimates are consistently constant (below threshold)
if np.std(cate_estimates) <= sd_threshold:
- return np.inf # Return inf for constant CATE estimates
+ return np.inf
- # Prepare data for treated and control groups
- Y0X, treatment_name, split_test_by = self._Y0_X_potential_outcomes(
- estimate, df)
- Y0X_1 = Y0X[Y0X[split_test_by] == 1] # Treated group
- Y0X_0 = Y0X[Y0X[split_test_by] == 0] # Control group
+ # Get data splits and check validity
+ Y0X, treatment_name, split_test_by = self._Y0_X_potential_outcomes(estimate, df)
+ Y0X_1 = Y0X[Y0X[split_test_by] == 1]
+ Y0X_0 = Y0X[Y0X[split_test_by] == 0]
- # Check if either group is empty
if len(Y0X_1) == 0 or len(Y0X_0) == 0:
- return np.inf # Return inf if either group is empty
+ return np.inf
+
+ # Determine if propensity weighting should be used
+ use_propensity = self._should_use_propensity(estimate)
- # Select columns for analysis
+ # Normalize features
select_cols = estimate.estimator._effect_modifier_names + ["yhat"]
+ scaler = StandardScaler()
+ Y0X_1_normalized = scaler.fit_transform(Y0X_1[select_cols])
+ Y0X_0_normalized = scaler.transform(Y0X_0[select_cols])
- # Calculate propensity scores for treated group
- propensitymodel = self.psw_estimator.estimator.propensity_model
- YX_1_all_psw = propensitymodel.predict_proba(
- Y0X_1[
- self.causal_model.get_effect_modifiers()
- + self.causal_model.get_common_causes()
- ]
+ # Calculate pairwise differences
+ differences_xy = (
+ Y0X_1_normalized[:, np.newaxis, :] - Y0X_0_normalized[np.newaxis, :, :]
)
- treatment_series = Y0X_1[treatment_name]
- YX_1_psw = np.zeros(YX_1_all_psw.shape[0])
- for i in treatment_series.unique():
- YX_1_psw[treatment_series == i] = (
- YX_1_all_psw[:, i][treatment_series == i]
- )
- # Calculate propensity scores for control group
- propensitymodel = self.psw_estimator.estimator.propensity_model
- YX_0_psw = propensitymodel.predict_proba(
- Y0X_0[
- self.causal_model.get_effect_modifiers()
- + self.causal_model.get_common_causes()
- ]
- )[:, 0]
+ if use_propensity:
+ try:
+ # Calculate and apply propensity weights
+ propensitymodel = self.psw_estimator.estimator.propensity_model
+ YX_1_all_psw = propensitymodel.predict_proba(
+ Y0X_1[
+ self.causal_model.get_effect_modifiers()
+ + self.causal_model.get_common_causes()
+ ]
+ )
+ treatment_series = Y0X_1[treatment_name]
+ YX_1_psw = np.zeros(YX_1_all_psw.shape[0])
+ for i in treatment_series.unique():
+ YX_1_psw[treatment_series == i] = YX_1_all_psw[:, i][
+ treatment_series == i
+ ]
- # Ensure both datasets have the same number of rows
- min_rows = min(len(Y0X_1), len(Y0X_0))
- Y0X_1 = Y0X_1.iloc[:min_rows]
- Y0X_0 = Y0X_0.iloc[:min_rows]
- YX_1_psw = YX_1_psw[:min_rows]
- YX_0_psw = YX_0_psw[:min_rows]
+ YX_0_psw = propensitymodel.predict_proba(
+ Y0X_0[
+ self.causal_model.get_effect_modifiers()
+ + self.causal_model.get_common_causes()
+ ]
+ )[:, 0]
- # Calculate the difference matrix with propensity score weights
- D = (Y0X_1[select_cols].values - Y0X_0[select_cols].values) * \
- np.sqrt(YX_1_psw * YX_0_psw).reshape(-1, 1)
+ # Trim propensity scores
+ YX_1_psw = np.clip(YX_1_psw, 0.01, 0.99)
+ YX_0_psw = np.clip(YX_0_psw, 0.01, 0.99)
- # Compute Frobenius norm of the weighted difference matrix
- frobenius_norm = np.linalg.norm(D, ord='fro')
+ # Calculate joint weights and apply them
+ xy_psw = psw_joint_weights(YX_1_psw, YX_0_psw)
+ xy_mean_weights = np.mean(xy_psw)
+ weighted_differences_xy = np.reciprocal(xy_mean_weights) * np.multiply(
+ xy_psw[:, :, np.newaxis], differences_xy
+ )
+ except (AttributeError, KeyError):
+ # Fallback to unweighted if propensity weighting fails
+ weighted_differences_xy = differences_xy
+ else:
+ weighted_differences_xy = differences_xy
+
+ # Compute Frobenius norm
+ frobenius_norm = np.sqrt(np.sum(weighted_differences_xy**2))
+
+ # Normalize
+ n_1, n_0 = len(Y0X_1), len(Y0X_0)
+ p = differences_xy.shape[-1]
+ normalized_score = frobenius_norm / np.sqrt(n_1 * n_0 * p)
+
+ # Add regularization and variance component
+ cate_variance = np.var(cate_estimates)
+ inverse_variance_component = 1 / (cate_variance + epsilon)
+
+ composite_score = (
+ alpha * normalized_score + (1 - alpha) * inverse_variance_component
+ )
+
+ return composite_score if np.isfinite(composite_score) else np.inf
+
+ def _should_use_propensity(self, estimate: CausalEstimate) -> bool:
+ """
+ Determine if propensity score weighting should be used based on:
+ 1. Problem type
+ 2. Data characteristics
+ 3. Estimator properties
- # Normalize the Frobenius norm by sqrt(n * p) where n is number of
- # samples and p is number of features
- n, p = D.shape
- normalized_score = frobenius_norm / np.sqrt(n * p)
+ Args:
+ estimate (CausalEstimate): causal estimate being evaluated
- # Return the normalized score if it's finite, otherwise return infinity
- return normalized_score if np.isfinite(normalized_score) else np.inf
+ Returns:
+ bool: True if propensity weighting should be used
+ """
+ # Don't use propensity for IV problems
+ if self.problem == "iv":
+ return False
+
+ # Check if we have a backdoor problem with propensity modifiers
+ if self.problem == "backdoor":
+ data = self.causal_model
+ has_propensity = (
+ hasattr(data, "get_propensity_modifiers")
+ and len(data.get_propensity_modifiers()) > 0
+ )
+ has_confounders = len(data.get_common_causes()) > 0
+
+ # Use propensity if we have modifiers or confounders
+ return has_propensity or has_confounders
+
+ # Default to no propensity weighting
+ return False
def psw_energy_distance(
self,
@@ -407,9 +460,7 @@ def psw_energy_distance(
YX_1_psw = np.zeros(YX_1_all_psw.shape[0])
for i in treatment_series.unique():
- YX_1_psw[treatment_series == i] = (
- YX_1_all_psw[:, i][treatment_series == i]
- )
+ YX_1_psw[treatment_series == i] = YX_1_all_psw[:, i][treatment_series == i]
propensitymodel = self.psw_estimator.estimator.propensity_model
YX_0_psw = propensitymodel.predict_proba(
@@ -452,25 +503,23 @@ def psw_energy_distance(
),
)
distance_yy = np.reciprocal(yy_mean_weights) * np.multiply(
- yy_psw, dcor.distances.pairwise_distances(
- Y0X_1[select_cols], exponent=exponent), )
+ yy_psw,
+ dcor.distances.pairwise_distances(Y0X_1[select_cols], exponent=exponent),
+ )
distance_xx = np.reciprocal(xx_mean_weights) * np.multiply(
- xx_psw, dcor.distances.pairwise_distances(
- Y0X_0[select_cols], exponent=exponent), )
+ xx_psw,
+ dcor.distances.pairwise_distances(Y0X_0[select_cols], exponent=exponent),
+ )
psw_energy_distance = (
- 2
- * np.mean(distance_xy)
- - np.mean(distance_xx)
- - np.mean(distance_yy))
+ 2 * np.mean(distance_xy) - np.mean(distance_xx) - np.mean(distance_yy)
+ )
return psw_energy_distance
- # NEW:
@staticmethod
def default_policy(cate: np.ndarray) -> np.ndarray:
"""Default policy that assigns treatment if CATE > 0."""
return (cate > 0).astype(int)
- # NEW:
def policy_risk_score(
self,
estimate: CausalEstimate,
@@ -479,81 +528,73 @@ def policy_risk_score(
outcome_name: str,
policy: Optional[Callable[[np.ndarray], np.ndarray]] = None,
rct_indices: Optional[pd.Index] = None,
- sd_threshold: float = 1e-2,
- clip: float = 0.05
+ sd_threshold: float = 1e-4,
+ clip: float = 0.05,
) -> float:
- # Use default_policy if no custom policy is provided
- if policy is None:
- policy = self.default_policy
-
- # If no specific RCT indices are provided, use all indices
- if rct_indices is None:
- rct_indices = df.index
-
- # Ensure cate_estimate is a 1D array for consistent processing
+ # Ensure cate_estimate is a 1D array
cate_estimate = np.squeeze(cate_estimate)
- # Return 0 if CATE estimates are consistently constant (below
- # threshold)
+ # Handle constant or near-constant CATE estimates
if np.std(cate_estimate) <= sd_threshold:
- return 0 # This indicates no heterogeneity in treatment effects
+ return np.inf # Return infinity for constant estimates
+
+ # Use default_policy if no policy is provided
+ if policy is None:
+ policy = self.default_policy
- # Apply the policy to get treatment assignments based on CATE estimates
+ # Apply the policy to get treatment assignments
policy_treatment = policy(cate_estimate)
- # Validate that the propensity model is properly fitted
- if not hasattr(
- self.psw_estimator,
- 'estimator') or not hasattr(
- self.psw_estimator.estimator,
- 'propensity_model'):
- raise ValueError(
- "Propensity model fitting failed. Please check the setup.")
- else:
- # Calculate propensity scores using the pre-fitted propensity model
- propensity_scores = (
- self.psw_estimator.estimator.propensity_model.predict_proba(
- df[['random'] + self.psw_estimator._effect_modifier_names]
- )
- )
- if propensity_scores.ndim == 2:
- # Use second column if 2D array
- propensity_scores = propensity_scores[:, 1]
+ # Calculate propensity scores
+ if not hasattr(self.psw_estimator, "estimator") or not hasattr(
+ self.psw_estimator.estimator, "propensity_model"
+ ):
+ raise ValueError("Propensity model fitting failed. Please check the setup.")
- # Clip propensity scores to avoid extreme weights
- propensity_scores = np.clip(propensity_scores, clip, 1 - clip)
+ propensity_scores = self.psw_estimator.estimator.propensity_model.predict_proba(
+ df[
+ self.causal_model.get_effect_modifiers()
+ + self.causal_model.get_common_causes()
+ ]
+ )
+ if propensity_scores.ndim == 2:
+ propensity_scores = propensity_scores[:, 1]
+ propensity_scores = np.clip(propensity_scores, clip, 1 - clip)
treatment_name = self.psw_estimator._treatment_name
- # Calculate inverse probability weights
- weights = np.where(df[treatment_name] == 1,
- 1 / propensity_scores,
- 1 / (1 - propensity_scores))
+ # Calculate weights
+ weights = np.where(
+ df[treatment_name] == 1, 1 / propensity_scores, 1 / (1 - propensity_scores)
+ )
- # Prepare RCT subset for analysis
- rct_df = df.loc[rct_indices].copy()
- rct_df['weight'] = weights[rct_indices]
- rct_df['policy_treatment'] = policy_treatment[rct_indices]
+ # Prepare RCT subset
+ rct_df = df.loc[rct_indices].copy() if rct_indices is not None else df.copy()
+ rct_df["weight"] = weights
+ rct_df["policy_treatment"] = policy_treatment
- # Compute policy value using inverse probability weighting
+ # Compute policy value
value_policy = (
- (
- (rct_df[outcome_name] * (rct_df[treatment_name] == 1)
- * (rct_df['policy_treatment'] == 1)
- * rct_df['weight']).sum()
- / rct_df['weight'].sum()
- * (rct_df['policy_treatment'] == 1).mean()
- ) + (
- (rct_df[outcome_name] * (rct_df[treatment_name] == 0)
- * (rct_df['policy_treatment'] == 0)
- * rct_df['weight']).sum()
- / rct_df['weight'].sum()
- * (rct_df['policy_treatment'] == 0).mean()
- )
- )
-
- # Compute Policy Risk (1 - policy value)
- policy_risk = 1 - value_policy
+ rct_df[outcome_name]
+ * (rct_df[treatment_name] == 1)
+ * (rct_df["policy_treatment"] == 1)
+ * rct_df["weight"]
+ ).sum() / rct_df["weight"].sum() * (rct_df["policy_treatment"] == 1).mean() + (
+ rct_df[outcome_name]
+ * (rct_df[treatment_name] == 0)
+ * (rct_df["policy_treatment"] == 0)
+ * rct_df["weight"]
+ ).sum() / rct_df[
+ "weight"
+ ].sum() * (
+ rct_df["policy_treatment"] == 0
+ ).mean()
+
+ # Compute naive policy value (treating everyone)
+ naive_value = rct_df[outcome_name].mean()
+
+ # Compute normalized policy risk
+ policy_risk = max(0, (naive_value - value_policy) / abs(naive_value))
return policy_risk
@@ -638,25 +679,26 @@ def estimateConditionalQ(Y, X, Z):
W = np.hstack((X, Z))
# Compute the nearest neighbor of X
- nn_X = NearestNeighbors(n_neighbors=3, algorithm='auto').fit(X)
+ nn_X = NearestNeighbors(n_neighbors=3, algorithm="auto").fit(X)
nn_dists_X, nn_indices_X = nn_X.kneighbors(X)
nn_index_X = nn_indices_X[:, 1]
# Handle repeated data
repeat_data = np.where(nn_dists_X[:, 1] == 0)[0]
- df_X = pd.DataFrame(
- {'id': repeat_data, 'group': nn_indices_X[repeat_data, 0]})
- df_X['rnn'] = df_X.groupby('group')['id'].transform(Scorer.randomNN)
- nn_index_X[repeat_data] = df_X['rnn'].values
+ df_X = pd.DataFrame({"id": repeat_data, "group": nn_indices_X[repeat_data, 0]})
+ df_X["rnn"] = df_X.groupby("group")["id"].transform(Scorer.randomNN)
+ nn_index_X[repeat_data] = df_X["rnn"].values
# Nearest neighbors with ties
ties = np.where(nn_dists_X[:, 1] == nn_dists_X[:, 2])[0]
ties = np.setdiff1d(ties, repeat_data)
if len(ties) > 0:
+
def helper_ties(a):
- distances = distance.cdist(X[a].reshape(
- 1, -1), np.delete(X, a, axis=0)).flatten()
+ distances = distance.cdist(
+ X[a].reshape(1, -1), np.delete(X, a, axis=0)
+ ).flatten()
ids = np.where(distances == distances.min())[0]
x = np.random.choice(ids)
return x + (x >= a)
@@ -664,15 +706,14 @@ def helper_ties(a):
nn_index_X[ties] = [helper_ties(a) for a in ties]
# Compute the nearest neighbor of W
- nn_W = NearestNeighbors(n_neighbors=3, algorithm='auto').fit(W)
+ nn_W = NearestNeighbors(n_neighbors=3, algorithm="auto").fit(W)
nn_dists_W, nn_indices_W = nn_W.kneighbors(W)
nn_index_W = nn_indices_W[:, 1]
repeat_data = np.where(nn_dists_W[:, 1] == 0)[0]
- df_W = pd.DataFrame(
- {'id': repeat_data, 'group': nn_indices_W[repeat_data, 0]})
- df_W['rnn'] = df_W.groupby('group')['id'].transform(Scorer.randomNN)
- nn_index_W[repeat_data] = df_W['rnn'].values
+ df_W = pd.DataFrame({"id": repeat_data, "group": nn_indices_W[repeat_data, 0]})
+ df_W["rnn"] = df_W.groupby("group")["id"].transform(Scorer.randomNN)
+ nn_index_W[repeat_data] = df_W["rnn"].values
# Nearest neighbors with ties
ties = np.where(nn_dists_W[:, 1] == nn_dists_W[:, 2])[0]
@@ -683,8 +724,10 @@ def helper_ties(a):
# Estimate Q
R_Y = np.argsort(np.argsort(Y)) # Rank Y with ties method 'max'
- Q_n = (np.sum(np.minimum(R_Y, R_Y[nn_index_W]))
- - np.sum(np.minimum(R_Y, R_Y[nn_index_X]))) / (n**2)
+ Q_n = (
+ np.sum(np.minimum(R_Y, R_Y[nn_index_W]))
+ - np.sum(np.minimum(R_Y, R_Y[nn_index_X]))
+ ) / (n**2)
return Q_n
@@ -710,25 +753,26 @@ def estimateConditionalS(Y, X):
n = len(Y)
# Compute the nearest neighbor of X
- nn_X = NearestNeighbors(n_neighbors=3, algorithm='auto').fit(X)
+ nn_X = NearestNeighbors(n_neighbors=3, algorithm="auto").fit(X)
nn_dists_X, nn_indices_X = nn_X.kneighbors(X)
nn_index_X = nn_indices_X[:, 1]
# Handle repeated data
repeat_data = np.where(nn_dists_X[:, 1] == 0)[0]
- df_X = pd.DataFrame(
- {'id': repeat_data, 'group': nn_indices_X[repeat_data, 0]})
- df_X['rnn'] = df_X.groupby('group')['id'].transform(Scorer.randomNN)
- nn_index_X[repeat_data] = df_X['rnn'].values
+ df_X = pd.DataFrame({"id": repeat_data, "group": nn_indices_X[repeat_data, 0]})
+ df_X["rnn"] = df_X.groupby("group")["id"].transform(Scorer.randomNN)
+ nn_index_X[repeat_data] = df_X["rnn"].values
# Nearest neighbors with ties
ties = np.where(nn_dists_X[:, 1] == nn_dists_X[:, 2])[0]
ties = np.setdiff1d(ties, repeat_data)
if len(ties) > 0:
+
def helper_ties(a):
- distances = distance.cdist(X[a].reshape(
- 1, -1), np.delete(X, a, axis=0)).flatten()
+ distances = distance.cdist(
+ X[a].reshape(1, -1), np.delete(X, a, axis=0)
+ ).flatten()
ids = np.where(distances == distances.min())[0]
x = np.random.choice(ids)
return x + (x >= a)
@@ -812,8 +856,7 @@ def codec(Y, Z, X=None, na_rm=True):
n = len(Y)
if n < 2:
- raise ValueError(
- "Number of rows with no NAs should be greater than 1.")
+ raise ValueError("Number of rows with no NAs should be greater than 1.")
return Scorer.estimateConditionalQ(Y, Z, np.zeros((n, 0)))
@@ -824,23 +867,20 @@ def codec(Y, Z, X=None, na_rm=True):
X = np.array(X)
if not isinstance(Z, np.ndarray):
Z = np.array(Z)
- if len(Y) != X.shape[0] or len(
- Y) != Z.shape[0] or X.shape[0] != Z.shape[0]:
+ if len(Y) != X.shape[0] or len(Y) != Z.shape[0] or X.shape[0] != Z.shape[0]:
raise ValueError("Number of rows of Y, X, and Z should be equal.")
n = len(Y)
if n < 2:
- raise ValueError(
- "Number of rows with no NAs should be greater than 1.")
+ raise ValueError("Number of rows with no NAs should be greater than 1.")
return Scorer.estimateConditionalT(Y, Z, X)
# NEW
@staticmethod
def identify_confounders(
- df: pd.DataFrame,
- treatment_col: str,
- outcome_col: str) -> list:
+ df: pd.DataFrame, treatment_col: str, outcome_col: str
+ ) -> list:
"""
Identify confounders in a DataFrame.
@@ -854,11 +894,10 @@ def identify_confounders(
"""
confounders = [
- col for col in df.columns if col not in [
- treatment_col,
- outcome_col,
- "random",
- "index"]]
+ col
+ for col in df.columns
+ if col not in [treatment_col, outcome_col, "random", "index"]
+ ]
return confounders
# NEW
@@ -874,11 +913,13 @@ def codec_score(estimate: CausalEstimate, df: pd.DataFrame) -> float:
float: CODEC score
"""
est = estimate.estimator
- treatment_name = est._treatment_name if isinstance(
- est._treatment_name, str) else est._treatment_name[0]
+ treatment_name = (
+ est._treatment_name
+ if isinstance(est._treatment_name, str)
+ else est._treatment_name[0]
+ )
outcome_name = est._outcome_name
- confounders = Scorer.identify_confounders(
- df, treatment_name, outcome_name)
+ confounders = Scorer.identify_confounders(df, treatment_name, outcome_name)
########
cate_est = est.effect(df)
@@ -948,10 +989,8 @@ def real_qini_make_score(
@staticmethod
def r_make_score(
- estimate: CausalEstimate,
- df: pd.DataFrame,
- cate_estimate: np.ndarray,
- r_scorer) -> float:
+ estimate: CausalEstimate, df: pd.DataFrame, cate_estimate: np.ndarray, r_scorer
+ ) -> float:
"""
Calculate r_score.
@@ -1023,6 +1062,155 @@ def group_ate(
return pd.DataFrame(tmp2)
+ # NEW:
+ def bite_score(
+ self,
+ estimate: CausalEstimate,
+ df: pd.DataFrame,
+ N_values: Optional[List[int]] = None,
+ ) -> float:
+ """
+ Calculate the BITE (Bins-induced Kendall's Tau Evaluation) score.
+
+ Args:
+ estimate (CausalEstimate): The causal estimate to evaluate.
+ df (pd.DataFrame): The test dataframe.
+ N_values (Optional[List[int]]): List of bin counts to evaluate.
+
+ Returns:
+ float: The BITE score. Higher values indicate better model performance.
+ """
+ if N_values is None:
+ N_values = (
+ list(range(10, 21)) + list(range(25, 51, 5)) + list(range(60, 101, 10))
+ )
+
+ est = estimate.estimator
+ treatment_name = est._treatment_name
+ if not isinstance(treatment_name, str):
+ treatment_name = treatment_name[0]
+ outcome_name = est._outcome_name
+
+ # Create a copy of df to avoid modifying original
+ working_df = df.copy()
+
+ # Estimated ITEs on test data
+ cate_estimate = est.effect(df)
+ if len(cate_estimate.shape) > 1 and cate_estimate.shape[1] == 1:
+ cate_estimate = cate_estimate.reshape(-1)
+ working_df["estimated_ITE"] = cate_estimate
+
+ # Get propensity scores
+ if hasattr(self.psw_estimator.estimator, "propensity_model"):
+ propensity_model = self.psw_estimator.estimator.propensity_model
+ working_df["propensity"] = propensity_model.predict_proba(
+ df[
+ self.causal_model.get_effect_modifiers()
+ + self.causal_model.get_common_causes()
+ ]
+ )[:, 1]
+ else:
+ raise ValueError("Propensity model is not available.")
+
+ # Calculate weights with clipping to avoid extremes
+ working_df["weights"] = np.where(
+ working_df[treatment_name] == 1,
+ 1 / np.clip(working_df["propensity"], 0.05, 0.95),
+ 1 / np.clip(1 - working_df["propensity"], 0.05, 0.95),
+ )
+
+ kendall_tau_values = []
+
+ def compute_naive_estimate(group_data):
+ """Compute naive estimate for a group with safeguards against edge cases."""
+ treated = group_data[group_data[treatment_name] == 1]
+ control = group_data[group_data[treatment_name] == 0]
+
+ if len(treated) == 0 or len(control) == 0:
+ return np.nan
+
+ treated_weights = treated["weights"].values
+ control_weights = control["weights"].values
+
+ # Check if weights sum to 0 or if all weights are 0
+ if (
+ treated_weights.sum() == 0
+ or control_weights.sum() == 0
+ or not (treated_weights > 0).any()
+ or not (control_weights > 0).any()
+ ):
+ return np.nan
+
+ # Weighted averages with explicit handling of edge cases
+ try:
+ y1 = np.average(treated[outcome_name], weights=treated_weights)
+ y0 = np.average(control[outcome_name], weights=control_weights)
+ return y1 - y0
+ except ZeroDivisionError:
+ return np.nan
+
+ for N in N_values:
+ iter_df = working_df.copy()
+
+ try:
+ # Ensure enough unique values for binning
+ unique_ites = np.unique(iter_df["estimated_ITE"])
+ if len(unique_ites) < N:
+ continue
+
+ # Create bins
+ iter_df["ITE_bin"] = pd.qcut(
+ iter_df["estimated_ITE"], q=N, labels=False, duplicates="drop"
+ )
+
+ # Compute bin statistics
+ bin_stats = []
+ for bin_idx in iter_df["ITE_bin"].unique():
+ bin_data = iter_df[iter_df["ITE_bin"] == bin_idx]
+
+ # Skip if bin is too small
+ if len(bin_data) < 2:
+ continue
+
+ naive_est = compute_naive_estimate(bin_data)
+
+ # Only compute average ITE if weights are valid
+ bin_weights = bin_data["weights"].values
+ if bin_weights.sum() > 0 and not np.isnan(naive_est):
+ try:
+ avg_est_ite = np.average(
+ bin_data["estimated_ITE"], weights=bin_weights
+ )
+ bin_stats.append(
+ {
+ "ITE_bin": bin_idx,
+ "naive_estimate": naive_est,
+ "average_estimated_ITE": avg_est_ite,
+ }
+ )
+ except ZeroDivisionError:
+ continue
+
+ # Calculate Kendall's Tau if we have enough valid bins
+ bin_stats_df = pd.DataFrame(bin_stats)
+ if len(bin_stats_df) >= 2:
+ tau, _ = kendalltau(
+ bin_stats_df["naive_estimate"],
+ bin_stats_df["average_estimated_ITE"],
+ )
+ if not np.isnan(tau):
+ kendall_tau_values.append(tau)
+
+ except (ValueError, ZeroDivisionError):
+ continue
+
+ # Return final score
+ if len(kendall_tau_values) == 0:
+ return -np.inf # Return -inf for failed computations
+
+ top_3_taus = sorted(kendall_tau_values, reverse=True)[:3]
+ return np.mean(top_3_taus)
+
def make_scores(
self,
estimate: CausalEstimate,
@@ -1085,80 +1273,70 @@ def make_scores(
# simple_ate = simple_ate[0]
# .reset_index(drop=True)
propensitymodel = self.psw_estimator.estimator.propensity_model
- values["p"] = (
- propensitymodel.predict_proba(
- df[
- self.causal_model.get_effect_modifiers()
- + self.causal_model.get_common_causes()
- ]
- )[:, 1]
- )
+ values["p"] = propensitymodel.predict_proba(
+ df[
+ self.causal_model.get_effect_modifiers()
+ + self.causal_model.get_common_causes()
+ ]
+ )[:, 1]
values["policy"] = cate_estimate > 0
values["norm_policy"] = cate_estimate > simple_ate
- values["weights"] = self.erupt.weights(
- df, lambda x: cate_estimate > 0
- )
+ values["weights"] = self.erupt.weights(df, lambda x: cate_estimate > 0)
else:
pass
# TODO: what do we do here if multiple treatments?
if "erupt" in metrics_to_report:
- erupt_score = self.erupt.score(
- df, df[outcome_name], cate_estimate > 0)
+ erupt_score = self.erupt.score(df, df[outcome_name], cate_estimate > 0)
out["erupt"] = erupt_score
if "norm_erupt" in metrics_to_report:
norm_erupt_score = (
- self.erupt.score(
- df,
- df[outcome_name],
- cate_estimate > simple_ate
- ) - simple_ate * values["norm_policy"].mean()
+ self.erupt.score(df, df[outcome_name], cate_estimate > simple_ate)
+ - simple_ate * values["norm_policy"].mean()
)
out["norm_erupt"] = norm_erupt_score
+ # if "prob_erupt" in metrics_to_report:
+ # out["prob_erupt"] = self.erupt.probabilistic_erupt_score(
+ # df, df[est._outcome_name], estimate, cate_estimate
+ # )
+
if "prob_erupt" in metrics_to_report:
- treatment_effects = pd.Series(cate_estimate, index=df.index)
- treatment_std_devs = pd.Series(
- cate_estimate.std(), index=df.index)
prob_erupt_score = self.erupt.probabilistic_erupt_score(
- df, df[outcome_name],
- treatment_effects,
- treatment_std_devs
+ df, df[outcome_name], estimate
)
out["prob_erupt"] = prob_erupt_score
- if "frobenius_norm" in metrics_to_report:
- out["frobenius_norm"] = self.frobenius_norm_score(estimate, df)
+ # if "frobenius_norm" in metrics_to_report:
+ # out["frobenius_norm"] = self.frobenius_norm_score(estimate, df)
if "policy_risk" in metrics_to_report:
- try:
- out["policy_risk"] = self.policy_risk_score(
- estimate=estimate,
- df=df,
- cate_estimate=cate_estimate,
- outcome_name=outcome_name,
- policy=None
- )
- except Exception as e:
- e
- pass
+ out["policy_risk"] = self.policy_risk_score(
+ estimate=estimate,
+ df=df,
+ cate_estimate=cate_estimate,
+ outcome_name=outcome_name,
+ policy=None,
+ )
if "qini" in metrics_to_report:
- out["qini"] = Scorer.qini_make_score(
- estimate, df, cate_estimate)
+ out["qini"] = Scorer.qini_make_score(estimate, df, cate_estimate)
if "auc" in metrics_to_report:
out["auc"] = Scorer.auc_make_score(estimate, df, cate_estimate)
+ if "bite" in metrics_to_report:
+ bite_score = self.bite_score(estimate, df)
+ out["bite"] = bite_score
+
if r_scorer is not None:
out["r_score"] = Scorer.r_make_score(
estimate, df, cate_estimate, r_scorer
)
# values = values.rename(columns={treatment_name: "treated"})
- assert len(values) == len(
- df), "Index weirdness when adding columns!"
+ assert len(values) == len(df), "Index weirdness when adding columns!"
values = values.copy()
out["values"] = values
@@ -1178,6 +1356,12 @@ def make_scores(
temp = self.codec_score(estimate, df)
out["codec"] = temp
+ if "frobenius_norm" in metrics_to_report:
+ out["frobenius_norm"] = self.frobenius_norm_score(estimate, df)
+
+ # if "psw_frobenius_norm" in metrics_to_report:
+ # out["psw_frobenius_norm"] = self.psw_frobenius_norm_score(estimate, df)
+
del df
return out
@@ -1222,15 +1406,16 @@ def best_score_by_estimator(
if "estimator_name" in v and v["estimator_name"] == name
]
best[name] = (
- min(
- est_scores,
- key=lambda x: x[metric]) if metric in [
+ min(est_scores, key=lambda x: x[metric])
+ if metric
+ in [
"energy_distance",
"psw_energy_distance",
"frobenius_norm",
"codec",
- "policy_risk"] else max(
- est_scores,
- key=lambda x: x[metric]))
+ "policy_risk",
+ ]
+ else max(est_scores, key=lambda x: x[metric])
+ )
return best
diff --git a/notebooks/Linear_IV/_codec_run_1_Linear_IV.pkl b/notebooks/Linear_IV/_codec_run_1_Linear_IV.pkl
new file mode 100644
index 00000000..01349e5d
Binary files /dev/null and b/notebooks/Linear_IV/_codec_run_1_Linear_IV.pkl differ
diff --git a/notebooks/Linear_IV/_energy_distance_run_1_Linear_IV.pkl b/notebooks/Linear_IV/_energy_distance_run_1_Linear_IV.pkl
new file mode 100644
index 00000000..5187f3ee
Binary files /dev/null and b/notebooks/Linear_IV/_energy_distance_run_1_Linear_IV.pkl differ
diff --git a/notebooks/Linear_IV/_frobenius_norm_run_1_Linear_IV.pkl b/notebooks/Linear_IV/_frobenius_norm_run_1_Linear_IV.pkl
new file mode 100644
index 00000000..80c04e9a
Binary files /dev/null and b/notebooks/Linear_IV/_frobenius_norm_run_1_Linear_IV.pkl differ
diff --git a/notebooks/Linear_KC/_bite_run_1_Linear_KC.pkl b/notebooks/Linear_KC/_bite_run_1_Linear_KC.pkl
new file mode 100644
index 00000000..effc28af
Binary files /dev/null and b/notebooks/Linear_KC/_bite_run_1_Linear_KC.pkl differ
diff --git a/notebooks/Linear_KC/_energy_distance_run_1_Linear_KC.pkl b/notebooks/Linear_KC/_energy_distance_run_1_Linear_KC.pkl
new file mode 100644
index 00000000..6b4cab4b
Binary files /dev/null and b/notebooks/Linear_KC/_energy_distance_run_1_Linear_KC.pkl differ
diff --git a/notebooks/Linear_KC/_frobenius_norm_run_1_Linear_KC.pkl b/notebooks/Linear_KC/_frobenius_norm_run_1_Linear_KC.pkl
new file mode 100644
index 00000000..e15f697f
Binary files /dev/null and b/notebooks/Linear_KC/_frobenius_norm_run_1_Linear_KC.pkl differ
diff --git a/notebooks/Linear_KC/_policy_risk_run_1_Linear_KC.pkl b/notebooks/Linear_KC/_policy_risk_run_1_Linear_KC.pkl
new file mode 100644
index 00000000..691aa1f5
Binary files /dev/null and b/notebooks/Linear_KC/_policy_risk_run_1_Linear_KC.pkl differ
diff --git a/notebooks/Linear_KC/_psw_energy_distance_run_1_Linear_KC.pkl b/notebooks/Linear_KC/_psw_energy_distance_run_1_Linear_KC.pkl
new file mode 100644
index 00000000..e19ea1bd
Binary files /dev/null and b/notebooks/Linear_KC/_psw_energy_distance_run_1_Linear_KC.pkl differ
diff --git a/notebooks/Linear_KCKP/_bite_run_1_Linear_KCKP.pkl b/notebooks/Linear_KCKP/_bite_run_1_Linear_KCKP.pkl
new file mode 100644
index 00000000..decabc3f
Binary files /dev/null and b/notebooks/Linear_KCKP/_bite_run_1_Linear_KCKP.pkl differ
diff --git a/notebooks/Linear_KCKP/_energy_distance_run_1_Linear_KCKP.pkl b/notebooks/Linear_KCKP/_energy_distance_run_1_Linear_KCKP.pkl
new file mode 100644
index 00000000..0e0f6d28
Binary files /dev/null and b/notebooks/Linear_KCKP/_energy_distance_run_1_Linear_KCKP.pkl differ
diff --git a/notebooks/Linear_KCKP/_frobenius_norm_run_1_Linear_KCKP.pkl b/notebooks/Linear_KCKP/_frobenius_norm_run_1_Linear_KCKP.pkl
new file mode 100644
index 00000000..f7fd33d9
Binary files /dev/null and b/notebooks/Linear_KCKP/_frobenius_norm_run_1_Linear_KCKP.pkl differ
diff --git a/notebooks/Linear_KCKP/_policy_risk_run_1_Linear_KCKP.pkl b/notebooks/Linear_KCKP/_policy_risk_run_1_Linear_KCKP.pkl
new file mode 100644
index 00000000..8f52dfa6
Binary files /dev/null and b/notebooks/Linear_KCKP/_policy_risk_run_1_Linear_KCKP.pkl differ
diff --git a/notebooks/Linear_KCKP/_psw_energy_distance_run_1_Linear_KCKP.pkl b/notebooks/Linear_KCKP/_psw_energy_distance_run_1_Linear_KCKP.pkl
new file mode 100644
index 00000000..e136b998
Binary files /dev/null and b/notebooks/Linear_KCKP/_psw_energy_distance_run_1_Linear_KCKP.pkl differ
diff --git a/notebooks/Linear_RCT/_bite_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_bite_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..cfde33d4
Binary files /dev/null and b/notebooks/Linear_RCT/_bite_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/Linear_RCT/_codec_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_codec_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..bd74e148
Binary files /dev/null and b/notebooks/Linear_RCT/_codec_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/Linear_RCT/_energy_distance_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_energy_distance_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..c5f4ef48
Binary files /dev/null and b/notebooks/Linear_RCT/_energy_distance_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/Linear_RCT/_frobenius_norm_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_frobenius_norm_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..2a718759
Binary files /dev/null and b/notebooks/Linear_RCT/_frobenius_norm_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/Linear_RCT/_policy_risk_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_policy_risk_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..af1a1497
Binary files /dev/null and b/notebooks/Linear_RCT/_policy_risk_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/Linear_RCT/_psw_energy_distance_run_1_Linear_RCT.pkl b/notebooks/Linear_RCT/_psw_energy_distance_run_1_Linear_RCT.pkl
new file mode 100644
index 00000000..ede9047e
Binary files /dev/null and b/notebooks/Linear_RCT/_psw_energy_distance_run_1_Linear_RCT.pkl differ
diff --git a/notebooks/NonLinear_IV/_codec_run_1_NonLinear_IV.pkl b/notebooks/NonLinear_IV/_codec_run_1_NonLinear_IV.pkl
new file mode 100644
index 00000000..04277d4e
Binary files /dev/null and b/notebooks/NonLinear_IV/_codec_run_1_NonLinear_IV.pkl differ
diff --git a/notebooks/NonLinear_IV/_energy_distance_run_1_NonLinear_IV.pkl b/notebooks/NonLinear_IV/_energy_distance_run_1_NonLinear_IV.pkl
new file mode 100644
index 00000000..f9504cb9
Binary files /dev/null and b/notebooks/NonLinear_IV/_energy_distance_run_1_NonLinear_IV.pkl differ
diff --git a/notebooks/NonLinear_IV/_frobenius_norm_run_1_NonLinear_IV.pkl b/notebooks/NonLinear_IV/_frobenius_norm_run_1_NonLinear_IV.pkl
new file mode 100644
index 00000000..7bbf9d80
Binary files /dev/null and b/notebooks/NonLinear_IV/_frobenius_norm_run_1_NonLinear_IV.pkl differ
diff --git a/notebooks/NonLinear_KC/_bite_run_1_NonLinear_KC.pkl b/notebooks/NonLinear_KC/_bite_run_1_NonLinear_KC.pkl
new file mode 100644
index 00000000..9d25f2d9
Binary files /dev/null and b/notebooks/NonLinear_KC/_bite_run_1_NonLinear_KC.pkl differ
diff --git a/notebooks/NonLinear_KC/_energy_distance_run_1_NonLinear_KC.pkl b/notebooks/NonLinear_KC/_energy_distance_run_1_NonLinear_KC.pkl
new file mode 100644
index 00000000..8a74a97f
Binary files /dev/null and b/notebooks/NonLinear_KC/_energy_distance_run_1_NonLinear_KC.pkl differ
diff --git a/notebooks/NonLinear_KC/_frobenius_norm_run_1_NonLinear_KC.pkl b/notebooks/NonLinear_KC/_frobenius_norm_run_1_NonLinear_KC.pkl
new file mode 100644
index 00000000..46b63534
Binary files /dev/null and b/notebooks/NonLinear_KC/_frobenius_norm_run_1_NonLinear_KC.pkl differ
diff --git a/notebooks/NonLinear_KC/_policy_risk_run_1_NonLinear_KC.pkl b/notebooks/NonLinear_KC/_policy_risk_run_1_NonLinear_KC.pkl
new file mode 100644
index 00000000..81f10685
Binary files /dev/null and b/notebooks/NonLinear_KC/_policy_risk_run_1_NonLinear_KC.pkl differ
diff --git a/notebooks/NonLinear_KC/_psw_energy_distance_run_1_NonLinear_KC.pkl b/notebooks/NonLinear_KC/_psw_energy_distance_run_1_NonLinear_KC.pkl
new file mode 100644
index 00000000..ceb7705e
Binary files /dev/null and b/notebooks/NonLinear_KC/_psw_energy_distance_run_1_NonLinear_KC.pkl differ
diff --git a/notebooks/NonLinear_KCKP/_bite_run_1_NonLinear_KCKP.pkl b/notebooks/NonLinear_KCKP/_bite_run_1_NonLinear_KCKP.pkl
new file mode 100644
index 00000000..9abb7806
Binary files /dev/null and b/notebooks/NonLinear_KCKP/_bite_run_1_NonLinear_KCKP.pkl differ
diff --git a/notebooks/NonLinear_KCKP/_energy_distance_run_1_NonLinear_KCKP.pkl b/notebooks/NonLinear_KCKP/_energy_distance_run_1_NonLinear_KCKP.pkl
new file mode 100644
index 00000000..05067736
Binary files /dev/null and b/notebooks/NonLinear_KCKP/_energy_distance_run_1_NonLinear_KCKP.pkl differ
diff --git a/notebooks/NonLinear_KCKP/_frobenius_norm_run_1_NonLinear_KCKP.pkl b/notebooks/NonLinear_KCKP/_frobenius_norm_run_1_NonLinear_KCKP.pkl
new file mode 100644
index 00000000..0ea15e69
Binary files /dev/null and b/notebooks/NonLinear_KCKP/_frobenius_norm_run_1_NonLinear_KCKP.pkl differ
diff --git a/notebooks/NonLinear_KCKP/_policy_risk_run_1_NonLinear_KCKP.pkl b/notebooks/NonLinear_KCKP/_policy_risk_run_1_NonLinear_KCKP.pkl
new file mode 100644
index 00000000..69f81319
Binary files /dev/null and b/notebooks/NonLinear_KCKP/_policy_risk_run_1_NonLinear_KCKP.pkl differ
diff --git a/notebooks/NonLinear_KCKP/_psw_energy_distance_run_1_NonLinear_KCKP.pkl b/notebooks/NonLinear_KCKP/_psw_energy_distance_run_1_NonLinear_KCKP.pkl
new file mode 100644
index 00000000..b47332a7
Binary files /dev/null and b/notebooks/NonLinear_KCKP/_psw_energy_distance_run_1_NonLinear_KCKP.pkl differ
diff --git a/notebooks/NonLinear_RCT/_bite_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_bite_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..edae7a19
Binary files /dev/null and b/notebooks/NonLinear_RCT/_bite_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/NonLinear_RCT/_codec_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_codec_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..8e9b1c37
Binary files /dev/null and b/notebooks/NonLinear_RCT/_codec_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/NonLinear_RCT/_energy_distance_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_energy_distance_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..8e30d899
Binary files /dev/null and b/notebooks/NonLinear_RCT/_energy_distance_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/NonLinear_RCT/_frobenius_norm_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_frobenius_norm_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..c3a544d9
Binary files /dev/null and b/notebooks/NonLinear_RCT/_frobenius_norm_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/NonLinear_RCT/_policy_risk_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_policy_risk_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..9b8c267b
Binary files /dev/null and b/notebooks/NonLinear_RCT/_policy_risk_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/NonLinear_RCT/_psw_energy_distance_run_1_NonLinear_RCT.pkl b/notebooks/NonLinear_RCT/_psw_energy_distance_run_1_NonLinear_RCT.pkl
new file mode 100644
index 00000000..3f6c86ae
Binary files /dev/null and b/notebooks/NonLinear_RCT/_psw_energy_distance_run_1_NonLinear_RCT.pkl differ
diff --git a/notebooks/Random assignment, binary CATE example.ipynb b/notebooks/Random assignment, binary CATE example.ipynb
index 6141b0f7..c7f31249 100644
--- a/notebooks/Random assignment, binary CATE example.ipynb
+++ b/notebooks/Random assignment, binary CATE example.ipynb
@@ -24,7 +24,15 @@
"name": "#%%\n"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
+ ]
+ }
+ ],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
@@ -441,16 +449,16 @@
"
\n",
" \n",
" 0 | \n",
- " 1 | \n",
- " 5.599916 | \n",
- " 1.0 | \n",
- " -0.528603 | \n",
- " -0.343455 | \n",
- " 1.128554 | \n",
- " 0.161703 | \n",
- " -0.316603 | \n",
+ " 0 | \n",
+ " 6.875856 | \n",
+ " 0.0 | \n",
+ " -1.736945 | \n",
+ " -1.802002 | \n",
+ " 0.383828 | \n",
+ " 2.244319 | \n",
+ " -0.629189 | \n",
" 1.295216 | \n",
- " 1.0 | \n",
+ " 0.0 | \n",
" ... | \n",
" 1.0 | \n",
" 1.0 | \n",
@@ -466,14 +474,14 @@
"
\n",
" 1 | \n",
" 0 | \n",
- " 1.366206 | \n",
+ " 2.996273 | \n",
" 1.0 | \n",
- " 0.390083 | \n",
- " 0.596582 | \n",
- " -1.850350 | \n",
+ " -0.807451 | \n",
+ " -0.202946 | \n",
+ " -0.360898 | \n",
" -0.879606 | \n",
- " -0.004017 | \n",
- " -0.857787 | \n",
+ " 0.808706 | \n",
+ " -0.526556 | \n",
" 0.0 | \n",
" ... | \n",
" 1.0 | \n",
@@ -490,18 +498,18 @@
"
\n",
" 2 | \n",
" 0 | \n",
- " 1.963538 | \n",
- " 0.0 | \n",
- " -1.045228 | \n",
- " -0.602710 | \n",
- " 0.011465 | \n",
- " 0.161703 | \n",
- " 0.683672 | \n",
- " -0.360940 | \n",
+ " 1.366206 | \n",
" 1.0 | \n",
+ " 0.390083 | \n",
+ " 0.596582 | \n",
+ " -1.850350 | \n",
+ " -0.879606 | \n",
+ " -0.004017 | \n",
+ " -0.857787 | \n",
+ " 0.0 | \n",
" ... | \n",
" 1.0 | \n",
- " 1.0 | \n",
+ " 0.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
@@ -566,16 +574,16 @@
],
"text/plain": [
" treatment y_factual random x1 x2 x3 x4 \\\n",
- "0 1 5.599916 1.0 -0.528603 -0.343455 1.128554 0.161703 \n",
- "1 0 1.366206 1.0 0.390083 0.596582 -1.850350 -0.879606 \n",
- "2 0 1.963538 0.0 -1.045228 -0.602710 0.011465 0.161703 \n",
+ "0 0 6.875856 0.0 -1.736945 -1.802002 0.383828 2.244319 \n",
+ "1 0 2.996273 1.0 -0.807451 -0.202946 -0.360898 -0.879606 \n",
+ "2 0 1.366206 1.0 0.390083 0.596582 -1.850350 -0.879606 \n",
"3 0 4.762090 0.0 0.467901 -0.202946 -0.733261 0.161703 \n",
"4 0 6.594044 1.0 0.513295 0.596582 0.756191 1.203011 \n",
"\n",
" x5 x6 x7 ... x16 x17 x18 x19 x20 x21 x22 x23 x24 \\\n",
- "0 -0.316603 1.295216 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
- "1 -0.004017 -0.857787 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
- "2 0.683672 -0.360940 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "0 -0.629189 1.295216 0.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "1 0.808706 -0.526556 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
+ "2 -0.004017 -0.857787 0.0 ... 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.058500 1.957678 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 -0.066534 2.620141 1.0 ... 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
@@ -626,21 +634,21 @@
"\n",
"# choose estimators of interest\n",
"estimator_list = [\n",
- " # \"Dummy\",\n",
- " # \"SparseLinearDML\",\n",
- " # \"ForestDRLearner\",\n",
- " # \"TransformedOutcome\",\n",
+ " \"Dummy\",\n",
+ " \"SparseLinearDML\",\n",
+ " \"ForestDRLearner\",\n",
+ " \"TransformedOutcome\",\n",
" \"CausalForestDML\",\n",
- " # \".LinearDML\",\n",
- " # \"DomainAdaptationLearner\",\n",
+ " \".LinearDML\",\n",
+ " \"DomainAdaptationLearner\",\n",
" \"SLearner\",\n",
" \"XLearner\",\n",
- " # \"TLearner\",\n",
+ " \"TLearner\",\n",
" # \"Ortho\"\n",
" ]\n",
"\n",
"# set evaluation metric\n",
- "metric = \"energy_distance\"\n",
+ "metric = \"prob_erupt\"\n",
"\n",
"# it's best to specify either time_budget or components_time_budget, \n",
"# and let the other one be inferred; time in seconds\n",
@@ -653,7 +661,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "ea8d2df3",
"metadata": {
"collapsed": false,
@@ -667,11 +675,102 @@
"output_type": "stream",
"text": [
"Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks\n",
- "Initial configs: [{'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.metalearners.XLearner'}}, {'estimator': {'estimator_name': 'backdoor.econml.dml.CausalForestDML', 'drate': True, 'n_estimators': 100, 'criterion': 'mse', 'min_samples_split': 10, 'min_samples_leaf': 5, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'min_impurity_decrease': 0.0, 'max_samples': 0.45, 'min_balancedness_tol': 0.45, 'honest': True, 'fit_intercept': True, 'subforest_size': 4}}]\n",
- "---------------------\n",
- "Best estimator: backdoor.econml.metalearners.XLearner\n",
- "Best config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.XLearner'}}\n",
- "Best score: 0.22739775773260096\n"
+ "Propensity Model Fitted Successfully\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: NaiveDummy\n",
+ "CATE estimate summary:\n",
+ "Mean: 4.0815\n",
+ "Std: 0.0004\n",
+ "Min: 4.0804\n",
+ "Max: 4.0826\n",
+ "Inference capability check result: False\n",
+ "Estimator does not support inference - returning 0\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: NaiveDummy\n",
+ "CATE estimate summary:\n",
+ "Mean: 3.8796\n",
+ "Std: 0.0004\n",
+ "Min: 3.8785\n",
+ "Max: 3.8806\n",
+ "Inference capability check result: False\n",
+ "Estimator does not support inference - returning 0\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Dummy\n",
+ "CATE estimate summary:\n",
+ "Mean: 4.0815\n",
+ "Std: 0.0004\n",
+ "Min: 4.0801\n",
+ "Max: 4.0826\n",
+ "Inference capability check result: False\n",
+ "Estimator does not support inference - returning 0\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Dummy\n",
+ "CATE estimate summary:\n",
+ "Mean: 3.8796\n",
+ "Std: 0.0004\n",
+ "Min: 3.8787\n",
+ "Max: 3.8806\n",
+ "Inference capability check result: False\n",
+ "Estimator does not support inference - returning 0\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Econml\n",
+ "CATE estimate summary:\n",
+ "Mean: 3.7754\n",
+ "Std: 0.6602\n",
+ "Min: 1.0852\n",
+ "Max: 4.8237\n",
+ "Inference capability check result: True\n",
+ "Exception occurred: Can't call 'effect_inference' because 'inference' is None\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Econml\n",
+ "CATE estimate summary:\n",
+ "Mean: 3.7402\n",
+ "Std: 0.7194\n",
+ "Min: 1.0852\n",
+ "Max: 4.7213\n",
+ "Inference capability check result: True\n",
+ "Exception occurred: Can't call 'effect_inference' because 'inference' is None\n"
+ ]
+ },
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[11], line 13\u001b[0m\n\u001b[1;32m 1\u001b[0m ct \u001b[38;5;241m=\u001b[39m CausalTune(\n\u001b[1;32m 2\u001b[0m estimator_list\u001b[38;5;241m=\u001b[39mestimator_list,\n\u001b[1;32m 3\u001b[0m metric\u001b[38;5;241m=\u001b[39mmetric,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 8\u001b[0m train_size\u001b[38;5;241m=\u001b[39mtrain_size\n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# run causaltune\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m \u001b[43mct\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutcome\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutcomes\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m---------------------\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# return best estimator\u001b[39;00m\n",
+ "File \u001b[0;32m~/Documents/GitHub/causaltune_pr/causaltune/optimiser.py:501\u001b[0m, in \u001b[0;36mCausalTune.fit\u001b[0;34m(self, data, treatment, outcome, common_causes, effect_modifiers, instruments, propensity_modifiers, estimator_list, resume, time_budget, preprocess, encoder_type, encoder_outcome)\u001b[0m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m cfg \u001b[38;5;129;01min\u001b[39;00m init_cfg:\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresume_cfg\u001b[38;5;241m.\u001b[39mappend(cfg) \u001b[38;5;28;01mif\u001b[39;00m cfg \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresume_cfg \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 501\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresults \u001b[38;5;241m=\u001b[39m \u001b[43mtune\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 502\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_tune_with_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 503\u001b[0m \u001b[43m \u001b[49m\u001b[43msearch_space\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 504\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 505\u001b[0m \u001b[43m \u001b[49m\u001b[43mpoints_to_evaluate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 506\u001b[0m \u001b[43m \u001b[49m\u001b[43minit_cfg\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_cfg\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_cfg\u001b[49m\n\u001b[1;32m 507\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 508\u001b[0m \u001b[43m \u001b[49m\u001b[43mevaluated_rewards\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 509\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_scores\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresume_scores\u001b[49m\n\u001b[1;32m 510\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 511\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 512\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 513\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetric\u001b[49m\n\u001b[1;32m 514\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 515\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43menergy_distance\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 516\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpsw_energy_distance\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrobenius_norm\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpsw_frobenius_norm\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 519\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcodec\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpolicy_risk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 521\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 522\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 523\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 524\u001b[0m \u001b[43m \u001b[49m\u001b[43mlow_cost_partial_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 525\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_settings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtuner\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 526\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresults\u001b[38;5;241m.\u001b[39mget_best_trial() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 529\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\n\u001b[1;32m 530\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOptimization failed! Did you set large enough time_budget and components_budget?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 531\u001b[0m )\n",
+ "File \u001b[0;32m~/anaconda3/envs/causaltune-paper/lib/python3.9/site-packages/flaml/tune/tune.py:814\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_spark, use_incumbent_result_in_evaluation, log_file_name, lexico_objectives, force_cancel, n_concurrent_trials, **ray_args)\u001b[0m\n\u001b[1;32m 812\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 813\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m PySparkOvertimeMonitor(time_start, time_budget_s, force_cancel):\n\u001b[0;32m--> 814\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mevaluation_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial_to_run\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 815\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 816\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, \u001b[38;5;28mdict\u001b[39m):\n",
+ "File \u001b[0;32m~/Documents/GitHub/causaltune_pr/causaltune/optimiser.py:560\u001b[0m, in \u001b[0;36mCausalTune._tune_with_config\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_tune_with_config\u001b[39m(\u001b[38;5;28mself\u001b[39m, config: \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mdict\u001b[39m:\n\u001b[1;32m 551\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;124;03m Performs Hyperparameter Optimisation for a causal inference estimator.\u001b[39;00m\n\u001b[1;32m 553\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[38;5;124;03m (dict): values of metrics after optimisation\u001b[39;00m\n\u001b[1;32m 559\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 560\u001b[0m estimates \u001b[38;5;241m=\u001b[39m \u001b[43mParallel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mthreading\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 561\u001b[0m \u001b[43m \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_estimate_effect\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 562\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 564\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexception\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m estimates:\n\u001b[1;32m 565\u001b[0m est_name \u001b[38;5;241m=\u001b[39m estimates[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mestimator_name\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
+ "File \u001b[0;32m~/anaconda3/envs/causaltune-paper/lib/python3.9/site-packages/joblib/parallel.py:2007\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 2001\u001b[0m \u001b[38;5;66;03m# The first item from the output is blank, but it makes the interpreter\u001b[39;00m\n\u001b[1;32m 2002\u001b[0m \u001b[38;5;66;03m# progress until it enters the Try/Except block of the generator and\u001b[39;00m\n\u001b[1;32m 2003\u001b[0m \u001b[38;5;66;03m# reaches the first `yield` statement. This starts the asynchronous\u001b[39;00m\n\u001b[1;32m 2004\u001b[0m \u001b[38;5;66;03m# dispatch of the tasks to the workers.\u001b[39;00m\n\u001b[1;32m 2005\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[0;32m-> 2007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/anaconda3/envs/causaltune-paper/lib/python3.9/site-packages/joblib/parallel.py:1650\u001b[0m, in \u001b[0;36mParallel._get_outputs\u001b[0;34m(self, iterator, pre_dispatch)\u001b[0m\n\u001b[1;32m 1647\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m 1649\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mretrieval_context():\n\u001b[0;32m-> 1650\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retrieve()\n\u001b[1;32m 1652\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mGeneratorExit\u001b[39;00m:\n\u001b[1;32m 1653\u001b[0m \u001b[38;5;66;03m# The generator has been garbage collected before being fully\u001b[39;00m\n\u001b[1;32m 1654\u001b[0m \u001b[38;5;66;03m# consumed. This aborts the remaining tasks if possible and warn\u001b[39;00m\n\u001b[1;32m 1655\u001b[0m \u001b[38;5;66;03m# the user if necessary.\u001b[39;00m\n\u001b[1;32m 1656\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+ "File \u001b[0;32m~/anaconda3/envs/causaltune-paper/lib/python3.9/site-packages/joblib/parallel.py:1762\u001b[0m, in \u001b[0;36mParallel._retrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1757\u001b[0m \u001b[38;5;66;03m# If the next job is not ready for retrieval yet, we just wait for\u001b[39;00m\n\u001b[1;32m 1758\u001b[0m \u001b[38;5;66;03m# async callbacks to progress.\u001b[39;00m\n\u001b[1;32m 1759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ((\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[1;32m 1760\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mget_status(\n\u001b[1;32m 1761\u001b[0m timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;241m==\u001b[39m TASK_PENDING)):\n\u001b[0;32m-> 1762\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 1765\u001b[0m \u001b[38;5;66;03m# We need to be careful: the job list can be filling up as\u001b[39;00m\n\u001b[1;32m 1766\u001b[0m \u001b[38;5;66;03m# we empty it and Python list are not thread-safe by\u001b[39;00m\n\u001b[1;32m 1767\u001b[0m \u001b[38;5;66;03m# default hence the use of the lock\u001b[39;00m\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Econml\n",
+ "CATE estimate summary:\n",
+ "Mean: 4.0209\n",
+ "Std: 0.8874\n",
+ "Min: 0.2481\n",
+ "Max: 5.9306\n",
+ "Inference capability check result: True\n",
+ "Exception occurred: Can't call 'effect_inference' because 'inference' is None\n",
+ "\n",
+ "Debugging Probabilistic ERUPT for estimator: Econml\n",
+ "CATE estimate summary:\n",
+ "Mean: 3.9644\n",
+ "Std: 0.9270\n",
+ "Min: 0.3706\n",
+ "Max: 5.4427\n",
+ "Inference capability check result: True\n",
+ "Exception occurred: Can't call 'effect_inference' because 'inference' is None\n"
]
}
],
@@ -949,7 +1048,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.16"
+ "version": "3.9.19"
}
},
"nbformat": 4,
diff --git a/notebooks/Run metric tests across different scenarios.ipynb b/notebooks/Run metric tests across different scenarios.ipynb
index 0a2b4121..8090a721 100644
--- a/notebooks/Run metric tests across different scenarios.ipynb
+++ b/notebooks/Run metric tests across different scenarios.ipynb
@@ -2,17 +2,9 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
@@ -22,7 +14,8 @@
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
- "\n",
+ "import matplotlib\n",
+ "import colorsys\n",
"import copy \n",
"\n",
"import textwrap\n",
@@ -51,12 +44,14 @@
"from causaltune.datasets import generate_synthetic_data\n",
"\n",
"# Import linear synthetic data creation\n",
- "from causaltune.datasets import generate_linear_synthetic_data"
+ "from causaltune.datasets import generate_linear_synthetic_data\n",
+ "from causaltune.models.passthrough import passthrough_model\n",
+ "from causaltune.datasets import load_dataset, save_dataset"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 226,
"metadata": {},
"outputs": [],
"source": [
@@ -67,62 +62,98 @@
" \"policy_risk\",\n",
" \"codec\",\n",
" \"energy_distance\", \n",
- " \"psw_energy_distance\"\n",
+ " \"psw_energy_distance\",\n",
+ " \"bite\"\n",
" ]\n",
"\n",
"iv_metrics = [\n",
- " \"frobenius_norm\", \n",
- " \"energy_distance\", \n",
- " \"codec\", \n",
+ " \"energy_distance\",\n",
+ " \"codec\", \n",
+ " \"frobenius_norm\", \n",
" ]\n",
- " \n",
- "n_samples = 100\n",
- "test_size = 0.33 # equal train,val,test\n",
- "#time_budget = 21600\n",
- "components_time_budget = 10\n",
"\n",
"estimator_list = [\n",
- " #\"Dummy\",\n",
- " \"SparseLinearDML\",\n",
- " \"ForestDRLearner\",\n",
- " \"TransformedOutcome\",\n",
- " \"CausalForestDML\",\n",
- " \".LinearDML\",\n",
- " \"DomainAdaptationLearner\",\n",
- " #\"SLearner\",\n",
- " \"XLearner\",\n",
- " #\"TLearner\",\n",
- " #\"Ortho\" \n",
- " ] \n",
+ " \"Dummy\",\n",
+ " \"SparseLinearDML\",\n",
+ " \"ForestDRLearner\",\n",
+ " \"TransformedOutcome\",\n",
+ " \"CausalForestDML\",\n",
+ " \".LinearDML\",\n",
+ " \"DomainAdaptationLearner\",\n",
+ " \"SLearner\",\n",
+ " \"XLearner\",\n",
+ " \"TLearner\",\n",
+ " #\"Ortho\" \n",
+ " ] \n",
"\n",
"iv_estimator_list = [\n",
- " 'iv.econml.iv.dr.LinearDRIV', \n",
- " 'iv.econml.iv.dml.OrthoIV', \n",
- " 'iv.econml.iv.dml.DMLIV',\n",
- " 'iv.econml.iv.dr.SparseLinearDRIV',\n",
- " 'iv.econml.iv.dr.LinearIntentToTreatDRIV'\n",
- " ] \n",
+ " 'iv.econml.iv.dr.LinearDRIV', \n",
+ " #'iv.econml.iv.dml.OrthoIV', \n",
+ " 'iv.econml.iv.dml.DMLIV',\n",
+ " 'iv.econml.iv.dr.SparseLinearDRIV',\n",
+ " 'iv.econml.iv.dr.LinearIntentToTreatDRIV'\n",
+ " ] \n",
+ "\n",
"\n",
+ "# More Parameters\n",
"n_runs = 1\n",
- "out_dir = \"\"\n",
- "filename_out = \"iv\""
+ "num_samples = -1\n",
+ "\n",
+ "test_size = 0.33 # equal train,val,test\n",
+ "\n",
+ "time_budget = None\n",
+ "components_time_budget = 10\n",
+ "\n",
+ "propensity_model='dummy'\n",
+ "\n",
+ "filename_out = \"\"\n",
+ "out_dir = \"GENERIC_OUT_DIR\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 227,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if not os.path.exists(out_dir):\n",
+ " os.makedirs(out_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 1. Dataset Generation"
+ "# 1. Dataset Generation\n",
+ "\n",
+ "Generate synthetic data sets for your experiments or load one of the pre-made datasets."
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 228,
"metadata": {},
"outputs": [],
"source": [
"# Create empty dictionary\n",
- "data_sets = {}"
+ "#data_sets = {}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 229,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# # Save the dataset\n",
+ "# save_dataset(cd_linear, \"synthetic_data_test.pkl\")\n",
+ "\n",
+ "# # Load the dataset\n",
+ "# loaded_data = load_dataset(\"synthetic_data_test.pkl\")\n",
+ "\n",
+ "# # Now you can use the loaded_data just like the original synthetic_data\n",
+ "# loaded_data.data.head(5)"
]
},
{
@@ -130,40 +161,63 @@
"metadata": {},
"source": [
"### 1.1 Non-linear Data\n",
- "Unkown Confounders (RCT), Known Confoudners (Observational), IV"
+ "Randomized Controlled Trial (RCT), Known Confounders (KC), Known Propenisities (KCKP), Instrumental Variables (IV)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 230,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# cd = generate_synthetic_data(n_samples=n_samples, confounding=False, noisy_outcomes=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['NonLinear_RCT'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/NonLinear_RCT.pkl\")"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 231,
"metadata": {},
"outputs": [],
"source": [
- "#cd_non_linear = generate_synthetic_data(n_samples=n_samples, confounding=False, noisy_outcomes=True)\n",
- "#cd_non_linear.preprocess_dataset()\n",
- "#data_sets['rct_non-linear'] = cd_non_linear"
+ "# cd = generate_synthetic_data(n_samples=n_samples, confounding=True, noisy_outcomes=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['NonLinear_KC'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/NonLinear_KC.pkl\")"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 232,
"metadata": {},
"outputs": [],
"source": [
- "#cd_non_linear = generate_synthetic_data(n_samples=n_samples, confounding=True, noisy_outcomes=True)\n",
- "#cd_non_linear.preprocess_dataset()\n",
- "#data_sets['known_confounders_non-linear'] = cd_non_linear"
+ "# cd = generate_synthetic_data(n_samples=n_samples, confounding=True, known_propensity=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['NonLinear_KCKP'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/NonLinear_KCKP.pkl\")"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 233,
"metadata": {},
"outputs": [],
"source": [
- "cd_non_linear = generate_synthetic_data(n_samples=n_samples, confounding=True, add_instrument=True)\n",
- "cd_non_linear.preprocess_dataset()\n",
- "data_sets['IV_non-linear'] = cd_non_linear"
+ "# cd = generate_synthetic_data(n_samples=n_samples, confounding=True, add_instrument=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['NonLinear_IV'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/NonLinear_IV.pkl\")"
]
},
{
@@ -171,684 +225,110 @@
"metadata": {},
"source": [
"### 1.2 Linear Data\n",
- "Unkown Confounders (RCT), Known Confoudners (Observational), IV"
+ "Randomized Controlled Trial (RCT), Known Confounders (KC), Known Propenisities (KCKP), Instrumental Variables (IV)"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 234,
"metadata": {},
"outputs": [],
"source": [
- "#cd_linear = generate_linear_synthetic_data(n_samples=n_samples, confounding=False, noisy_outcomes=True)\n",
- "#cd_linear.preprocess_dataset()\n",
- "#data_sets['rct_linear'] = cd_linear"
+ "# cd = generate_linear_synthetic_data(n_samples=n_samples, confounding=False, noisy_outcomes=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['Linear_RCT'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/Linear_RCT.pkl\")"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 235,
"metadata": {},
"outputs": [],
"source": [
- "#cd_linear = generate_linear_synthetic_data(n_samples=n_samples, confounding=True, noisy_outcomes=True)\n",
- "#cd_linear.preprocess_dataset()\n",
- "#data_sets['known_confounders_linear'] = cd_linear"
+ "# cd = generate_linear_synthetic_data(n_samples=n_samples, confounding=True, noisy_outcomes=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['Linear_KC'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/Linear_KC.pkl\")"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 236,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " treatment | \n",
- " outcome | \n",
- " true_effect | \n",
- " base_outcome | \n",
- " instrument | \n",
- " random | \n",
- " X1 | \n",
- " X2 | \n",
- " X3 | \n",
- " X4 | \n",
- " X5 | \n",
- " propensity | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0 | \n",
- " 0.450543 | \n",
- " 1.289024 | \n",
- " 0.450543 | \n",
- " 1 | \n",
- " 0.0 | \n",
- " -0.076146 | \n",
- " -0.132621 | \n",
- " 0.419829 | \n",
- " 1.496246 | \n",
- " 0.094485 | \n",
- " 0.219328 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0 | \n",
- " 0.156640 | \n",
- " 0.483619 | \n",
- " 0.156640 | \n",
- " 0 | \n",
- " 1.0 | \n",
- " -0.824938 | \n",
- " 0.618384 | \n",
- " 0.111048 | \n",
- " 1.294570 | \n",
- " -0.694036 | \n",
- " 0.544131 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0 | \n",
- " -0.468900 | \n",
- " -1.123213 | \n",
- " -0.468900 | \n",
- " 0 | \n",
- " 1.0 | \n",
- " -0.084203 | \n",
- " -0.998936 | \n",
- " 0.038287 | \n",
- " -0.766497 | \n",
- " -0.299085 | \n",
- " 0.450420 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1 | \n",
- " -2.290974 | \n",
- " -1.580341 | \n",
- " -0.710633 | \n",
- " 1 | \n",
- " 0.0 | \n",
- " -0.072053 | \n",
- " -0.549470 | \n",
- " -1.041654 | \n",
- " -0.865283 | \n",
- " -0.379514 | \n",
- " 0.900000 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0 | \n",
- " 0.044335 | \n",
- " 0.216895 | \n",
- " 0.044335 | \n",
- " 0 | \n",
- " 0.0 | \n",
- " -0.130618 | \n",
- " 1.086328 | \n",
- " -0.431098 | \n",
- " 0.116885 | \n",
- " -0.314361 | \n",
- " 0.807708 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " treatment outcome true_effect base_outcome instrument random \\\n",
- "0 0 0.450543 1.289024 0.450543 1 0.0 \n",
- "1 0 0.156640 0.483619 0.156640 0 1.0 \n",
- "2 0 -0.468900 -1.123213 -0.468900 0 1.0 \n",
- "3 1 -2.290974 -1.580341 -0.710633 1 0.0 \n",
- "4 0 0.044335 0.216895 0.044335 0 0.0 \n",
- "\n",
- " X1 X2 X3 X4 X5 propensity \n",
- "0 -0.076146 -0.132621 0.419829 1.496246 0.094485 0.219328 \n",
- "1 -0.824938 0.618384 0.111048 1.294570 -0.694036 0.544131 \n",
- "2 -0.084203 -0.998936 0.038287 -0.766497 -0.299085 0.450420 \n",
- "3 -0.072053 -0.549470 -1.041654 -0.865283 -0.379514 0.900000 \n",
- "4 -0.130618 1.086328 -0.431098 0.116885 -0.314361 0.807708 "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "cd_linear = generate_linear_synthetic_data(n_samples=n_samples, confounding=True, add_instrument=True)\n",
- "cd_linear.preprocess_dataset()\n",
- "data_sets['IV_linear'] = cd_linear\n",
- "cd_linear.data.head(5)"
+ "# cd = generate_linear_synthetic_data(n_samples=n_samples, confounding=True, known_propensity=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['Linear_KCKP'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/Linear_KCKP.pkl\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 237,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# cd = generate_linear_synthetic_data(n_samples=n_samples, confounding=True, add_instrument=True)\n",
+ "# cd.preprocess_dataset()\n",
+ "# data_sets['Linear_IV'] = cd\n",
+ "\n",
+ "# cd.data.head(5)\n",
+ "# save_dataset(cd, \"RunDatasets/Linear_IV.pkl\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 2. Model Fitting"
+ "### Data Loading\n",
+ "Instead of generating your own synthetic datasets above, you can load a pre-made data set for each scenario here (recommended e.g. for reproducibility)."
]
},
{
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Initialize an empty dictionary to store the loaded datasets\n",
+ "data_sets = {}\n",
+ "\n",
+ "# Choose size of data set ('small' or 'large')\n",
+ "size = 'small'\n",
+ "\n",
+ "# List of dataset names and file paths\n",
+ "dataset_names = ['NonLinear_RCT']#, 'NonLinear_KC', 'NonLinear_KCKP', 'NonLinear_IV', \n",
+ " #'Linear_RCT', 'Linear_KC', 'Linear_KCKP', 'Linear_IV']\n",
+ "file_paths = [f\"RunDatasets/{size}/{name}.pkl\" for name in dataset_names]\n",
+ "\n",
+ "# Loop through dataset names and file paths to load each dataset\n",
+ "for name, file_path in zip(dataset_names, file_paths):\n",
+ " data_sets[name] = load_dataset(file_path)\n",
+ "\n",
+ "# Optionally, print the keys of the dictionary to verify successful loading\n",
+ "print(f\"Loaded datasets: {list(data_sets.keys())}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:51:47] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 17:51:47] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:52:27] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'frobenius_norm': 1.7061476082509759}, 'validation': {'frobenius_norm': 1.8133158981000745}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:52:47] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'frobenius_norm': 1.5240321859227073}, 'validation': {'frobenius_norm': 1.2537755378872513}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:53:07] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'frobenius_norm': 1.4503309515531349}, 'validation': {'frobenius_norm': 2.1334819838273797}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:53:48] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'frobenius_norm': 2.0273869220789993}, 'validation': {'frobenius_norm': 1.6487019570997452}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:54:49] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 17:54:49] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'frobenius_norm': 1.469078660688006}, 'validation': {'frobenius_norm': 1.2767739128957123}}\n",
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:55:29] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'energy_distance': 1.2342694730438208}, 'validation': {'energy_distance': 3.581697941996879}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:55:49] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'energy_distance': 0.26447111116414446}, 'validation': {'energy_distance': 1.8041461329277957}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:56:10] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'energy_distance': 0.28117313378506426}, 'validation': {'energy_distance': 0.873065452526324}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:56:50] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'energy_distance': 0.20387149159602824}, 'validation': {'energy_distance': 0.7661351736217683}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:57:51] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 17:57:51] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'energy_distance': 0.20716529061236866}, 'validation': {'energy_distance': 0.8863925741834082}}\n",
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:58:31] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'codec': 0.008658008658008658}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:58:51] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'codec': 0.08900523560209424}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:59:12] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'codec': 0.07894736842105264}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 17:59:52] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'codec': 0.05365853658536586}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:00:53] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 18:00:53] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'codec': 0.008695652173913044}, 'validation': {'codec': 0.0}}\n",
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:01:33] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'frobenius_norm': 1.7238523719841985}, 'validation': {'frobenius_norm': 1.2998893813992485}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:01:54] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'frobenius_norm': 1.5951272809656039}, 'validation': {'frobenius_norm': 1.0673157380063014}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:02:14] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'frobenius_norm': 1.5749917488367353}, 'validation': {'frobenius_norm': 1.1209231659854761}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:02:54] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'frobenius_norm': 1.7497389879147947}, 'validation': {'frobenius_norm': 1.4443528602451912}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:03:55] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 18:03:55] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'frobenius_norm': 1.615607024139133}, 'validation': {'frobenius_norm': 1.1343300656833482}}\n",
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:04:36] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'energy_distance': 0.23724020051138384}, 'validation': {'energy_distance': 1.2377427037908548}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:04:56] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'energy_distance': 0.252395428844979}, 'validation': {'energy_distance': 1.2199628927910995}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:05:16] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'energy_distance': 0.2526835825297922}, 'validation': {'energy_distance': 1.2201602220588965}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:05:56] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'energy_distance': 0.2677465290200698}, 'validation': {'energy_distance': 1.260878360107383}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:06:57] {493} WARNING - Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]\n",
- "[flaml.tune.tune: 07-24 18:06:57] {636} INFO - trial 1 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'energy_distance': 0.2590631341798373}, 'validation': {'energy_distance': 1.2301493868146594}}\n",
- "Initial configs: [{'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'projection': True}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}, {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:07:38] {636} INFO - trial 2 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearDRIV', 'train': {'codec': 0.05142857142857143}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:07:58] {636} INFO - trial 3 config: {'estimator': {'estimator_name': 'iv.econml.iv.dml.DMLIV', 'mc_agg': 'mean'}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.OrthoIV', 'train': {'codec': 0.18991097922848665}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:08:18] {636} INFO - trial 4 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'projection': 0, 'opt_reweighted': 0, 'cov_clip': 0.1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dml.DMLIV', 'train': {'codec': -0.0033333333333333335}, 'validation': {'codec': 0.0}}\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[flaml.tune.tune: 07-24 18:08:59] {636} INFO - trial 5 config: {'estimator': {'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'cov_clip': 0.1, 'opt_reweighted': 1}}\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.SparseLinearDRIV', 'train': {'codec': 0.01342281879194631}, 'validation': {'codec': 0.0}}\n",
- "after estimate\n",
- "after setting score dictionary\n",
- "{'estimator_name': 'iv.econml.iv.dr.LinearIntentToTreatDRIV', 'train': {'codec': 0.05654761904761905}, 'validation': {'codec': 0.0}}\n"
- ]
- }
- ],
+ "source": [
+ "# 2. Model Fitting (Run Experiments)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
"source": [
"for dataset_name, cd in data_sets.items():\n",
" \n",
@@ -860,24 +340,37 @@
" cd_i.data = train_df\n",
" \n",
" #for metric in metrics:\n",
- " for metric in iv_metrics:\n",
+ " for metric in metrics:\n",
+ " \n",
+ " # use the passthrough_model to pass your propensities in the case of\n",
+ " # known propensities (KCKP)\n",
+ " #propensity_model=passthrough_model(\n",
+ " # cd.propensity_modifiers, include_control=False\n",
+ " #)\n",
" ct = CausalTune(\n",
" metric=metric,\n",
- " metrics_to_report=[metric],\n",
+ " estimator_list=estimator_list,\n",
+ "\n",
+ " num_samples = num_samples,\n",
+ " time_budget=time_budget,\n",
+ " components_time_budget=components_time_budget, \n",
+ " \n",
+ " metrics_to_report=metrics,\n",
" verbose=1,\n",
" components_verbose=1,\n",
- " components_time_budget=components_time_budget,\n",
- " #estimator_list=estimator_list,\n",
- " estimator_list=iv_estimator_list,\n",
" store_all_estimators=True,\n",
- " )\n",
"\n",
+ " propensity_model=propensity_model,\n",
+ " #outcome_model=-1,\n",
+ " )\n",
+ " \n",
" ct.fit(\n",
" data=cd_i,\n",
" treatment=\"treatment\",\n",
" outcome=\"outcome\",\n",
" )\n",
"\n",
+ " \n",
" # compute relevant scores (skip newdummy)\n",
" datasets = {\"train\": ct.train_df, \"validation\": ct.test_df, \"test\": test_df}\n",
" # get scores on train,val,test for each trial, \n",
@@ -898,12 +391,14 @@
" df,\n",
" metrics_to_report=ct.metrics_to_report,\n",
" )\n",
- "\n",
+ " \n",
" # add cate:\n",
" scores[ds_name][\"CATE_estimate\"] = estimator.estimator.effect(df)\n",
" # add ground truth for convenience\n",
" scores[ds_name][\"CATE_groundtruth\"] = df[\"true_effect\"]\n",
" scores[ds_name][metric] = est_scores[metric]\n",
+ " scores['optimization_score'] = trial.last_result.get('optimization_score')\n",
+ "\n",
" estimator_scores[estimator_name].append(scores)\n",
"\n",
"\n",
@@ -912,7 +407,12 @@
" estimator_scores[k] = sorted(\n",
" estimator_scores[k],\n",
" key=lambda x: x[\"validation\"][metric],\n",
- " reverse=False if metric in [\"energy_distance\", \"psw_energy_distance\", \"codec\"] else True,\n",
+ " reverse=False if metric in [\"energy_distance\", \n",
+ " \"psw_energy_distance\", \n",
+ " \"codec\", \n",
+ " \"frobenius_norm\", \n",
+ " \"psw_frobenius_norm\",\n",
+ " \"policy_risk\"] else True,\n",
" )\n",
" results = {\n",
" \"best_estimator\": ct.best_estimator,\n",
@@ -923,91 +423,124 @@
" }\n",
"\n",
"\n",
- " with open(f\"{out_dir}{filename_out}_{metric}_run_{i_run}_{dataset_name}.pkl\", \"wb\") as f:\n",
+ " with open(f\"{out_dir}/{filename_out}_{metric}_run_{i_run}_{dataset_name}.pkl\", \"wb\") as f:\n",
" pickle.dump(results, f)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Create Outcome Plots"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Specifiy Plot Type and Metrics to Create Specific Results Plots"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 241,
"metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "