Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cov_type and kwargs to BLP object #271

Merged
merged 9 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions doubleml/irm/apo.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _check_data(self, obj_dml_data):

return

def capo(self, basis, is_gate=False):
def capo(self, basis, is_gate=False, **kwargs):
"""
Calculate conditional average potential outcomes (CAPO) for a given basis.

Expand All @@ -398,10 +398,14 @@ def capo(self, basis, is_gate=False):
basis : :class:`pandas.DataFrame`
The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
where ``n_obs`` is the number of observations and ``d`` is the number of predictors.

is_gate : bool
Indicates whether the basis is constructed for GATE/GAPOs (dummy-basis).
Default is ``False``.

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -420,10 +424,10 @@ def capo(self, basis, is_gate=False):
orth_signal = self.psi_elements['psi_b'].reshape(-1)
# fit the best linear predictor
model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
model.fit()
model.fit(**kwargs)
return model

def gapo(self, groups):
def gapo(self, groups, **kwargs):
"""
Calculate group average potential outcomes (GAPO) for groups.

Expand All @@ -434,6 +438,9 @@ def gapo(self, groups):
Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -453,5 +460,5 @@ def gapo(self, groups):
if any(groups.sum(0) <= 5):
warnings.warn('At least one group effect is estimated with less than 6 observations.')

model = self.capo(groups, is_gate=True)
model = self.capo(groups, is_gate=True, **kwargs)
return model
15 changes: 11 additions & 4 deletions doubleml/irm/irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_

return res

def cate(self, basis, is_gate=False):
def cate(self, basis, is_gate=False, **kwargs):
"""
Calculate conditional average treatment effects (CATE) for a given basis.

Expand All @@ -440,10 +440,14 @@ def cate(self, basis, is_gate=False):
basis : :class:`pandas.DataFrame`
The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
where ``n_obs`` is the number of observations and ``d`` is the number of predictors.

is_gate : bool
Indicates whether the basis is constructed for GATEs (dummy-basis).
Default is ``False``.

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -462,10 +466,10 @@ def cate(self, basis, is_gate=False):
orth_signal = self.psi_elements['psi_b'].reshape(-1)
# fit the best linear predictor
model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
model.fit()
model.fit(**kwargs)
return model

def gate(self, groups):
def gate(self, groups, **kwargs):
"""
Calculate group average treatment effects (GATE) for groups.

Expand All @@ -476,6 +480,9 @@ def gate(self, groups):
Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -495,7 +502,7 @@ def gate(self, groups):
if any(groups.sum(0) <= 5):
warnings.warn('At least one group effect is estimated with less than 6 observations.')

model = self.cate(groups, is_gate=True)
model = self.cate(groups, is_gate=True, **kwargs)
return model

def policy_tree(self, features, depth=2, **tree_params):
Expand Down
17 changes: 13 additions & 4 deletions doubleml/irm/tests/test_apo.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,14 @@ def test_dml_apo_sensitivity(dml_apo_fixture):
rtol=1e-9, atol=1e-4)


@pytest.fixture(scope='module',
params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
def cov_type(request):
return request.param


@pytest.mark.ci
def test_dml_apo_capo_gapo(treatment_level):
def test_dml_apo_capo_gapo(treatment_level, cov_type):
n = 20
# collect data
np.random.seed(42)
Expand All @@ -221,25 +227,28 @@ def test_dml_apo_capo_gapo(treatment_level):
dml_obj.fit()
# create a random basis
random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
capo = dml_obj.capo(random_basis)
capo = dml_obj.capo(random_basis, cov_type=cov_type)
assert isinstance(capo, dml.utils.blp.DoubleMLBLP)
assert isinstance(capo.confint(), pd.DataFrame)
assert capo.blp_model.cov_type == cov_type

groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= -1.0,
obj_dml_data.data['X1'] > 0.2]),
columns=['Group 1', 'Group 2'])
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gapo_1 = dml_obj.gapo(groups_1)
gapo_1 = dml_obj.gapo(groups_1, cov_type=cov_type)
assert isinstance(gapo_1, dml.utils.blp.DoubleMLBLP)
assert isinstance(gapo_1.confint(), pd.DataFrame)
assert all(gapo_1.confint().index == groups_1.columns.to_list())
assert gapo_1.blp_model.cov_type == cov_type

np.random.seed(42)
groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n, p=[0.1, 0.9]))
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gapo_2 = dml_obj.gapo(groups_2)
gapo_2 = dml_obj.gapo(groups_2, cov_type=cov_type)
assert isinstance(gapo_2, dml.utils.blp.DoubleMLBLP)
assert isinstance(gapo_2.confint(), pd.DataFrame)
assert all(gapo_2.confint().index == ["Group_1", "Group_2"])
assert gapo_2.blp_model.cov_type == cov_type
17 changes: 13 additions & 4 deletions doubleml/irm/tests/test_irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,14 @@ def test_dml_irm_sensitivity_rho0(dml_irm_fixture):
rtol=1e-9, atol=1e-4)


@pytest.fixture(scope='module',
params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
def cov_type(request):
return request.param


@pytest.mark.ci
def test_dml_irm_cate_gate():
def test_dml_irm_cate_gate(cov_type):
n = 9
# collect data
np.random.seed(42)
Expand All @@ -207,28 +213,31 @@ def test_dml_irm_cate_gate():
dml_irm_obj.fit()
# create a random basis
random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
cate = dml_irm_obj.cate(random_basis)
cate = dml_irm_obj.cate(random_basis, cov_type=cov_type)
assert isinstance(cate, dml.utils.blp.DoubleMLBLP)
assert isinstance(cate.confint(), pd.DataFrame)
assert cate.blp_model.cov_type == cov_type

groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= 0,
obj_dml_data.data['X1'] > 0.2]),
columns=['Group 1', 'Group 2'])
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gate_1 = dml_irm_obj.gate(groups_1)
gate_1 = dml_irm_obj.gate(groups_1, cov_type=cov_type)
assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
assert isinstance(gate_1.confint(), pd.DataFrame)
assert all(gate_1.confint().index == groups_1.columns.to_list())
assert gate_1.blp_model.cov_type == cov_type

np.random.seed(42)
groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gate_2 = dml_irm_obj.gate(groups_2)
gate_2 = dml_irm_obj.gate(groups_2, cov_type=cov_type)
assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
assert isinstance(gate_2.confint(), pd.DataFrame)
assert all(gate_2.confint().index == ["Group_1", "Group_2"])
assert gate_2.blp_model.cov_type == cov_type


@pytest.fixture(scope='module',
Expand Down
15 changes: 11 additions & 4 deletions doubleml/plm/plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_

return res

def cate(self, basis, is_gate=False):
def cate(self, basis, is_gate=False, **kwargs):
"""
Calculate conditional average treatment effects (CATE) for a given basis.

Expand All @@ -350,10 +350,14 @@ def cate(self, basis, is_gate=False):
basis : :class:`pandas.DataFrame`
The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
where ``n_obs`` is the number of observations and ``d`` is the number of predictors.

is_gate : bool
Indicates whether the basis is constructed for GATEs (dummy-basis).
Default is ``False``.

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -374,10 +378,10 @@ def cate(self, basis, is_gate=False):
basis=D_basis,
is_gate=is_gate,
)
model.fit()
model.fit(**kwargs)
return model

def gate(self, groups):
def gate(self, groups, **kwargs):
"""
Calculate group average treatment effects (GATE) for groups.

Expand All @@ -388,6 +392,9 @@ def gate(self, groups):
Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit` e.g. ``cov_type``.

Returns
-------
model : :class:`doubleML.DoubleMLBLP`
Expand All @@ -407,7 +414,7 @@ def gate(self, groups):
if any(groups.sum(0) <= 5):
warnings.warn('At least one group effect is estimated with less than 6 observations.')

model = self.cate(groups, is_gate=True)
model = self.cate(groups, is_gate=True, **kwargs)
return model

def _partial_out(self):
Expand Down
17 changes: 13 additions & 4 deletions doubleml/plm/tests/test_plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,14 @@ def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
rtol=1e-9, atol=1e-4)


@pytest.fixture(scope='module',
params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
def cov_type(request):
return request.param


@pytest.mark.ci
def test_dml_plr_cate_gate(score):
def test_dml_plr_cate_gate(score, cov_type):
n = 9

# collect data
Expand All @@ -318,26 +324,29 @@ def test_dml_plr_cate_gate(score):
score=score)
dml_plr_obj.fit()
random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
cate = dml_plr_obj.cate(random_basis)
cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
assert isinstance(cate, dml.DoubleMLBLP)
assert isinstance(cate.confint(), pd.DataFrame)
assert cate.blp_model.cov_type == cov_type

groups_1 = pd.DataFrame(
np.column_stack([obj_dml_data.data['X1'] <= 0,
obj_dml_data.data['X1'] > 0.2]),
columns=['Group 1', 'Group 2'])
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gate_1 = dml_plr_obj.gate(groups_1)
gate_1 = dml_plr_obj.gate(groups_1, cov_type=cov_type)
assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
assert isinstance(gate_1.confint(), pd.DataFrame)
assert all(gate_1.confint().index == groups_1.columns.tolist())
assert gate_1.blp_model.cov_type == cov_type

np.random.seed(42)
groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
msg = ('At least one group effect is estimated with less than 6 observations.')
with pytest.warns(UserWarning, match=msg):
gate_2 = dml_plr_obj.gate(groups_2)
gate_2 = dml_plr_obj.gate(groups_2, cov_type=cov_type)
assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
assert isinstance(gate_2.confint(), pd.DataFrame)
assert all(gate_2.confint().index == ["Group_1", "Group_2"])
assert gate_2.blp_model.cov_type == cov_type
15 changes: 12 additions & 3 deletions doubleml/utils/blp.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,27 @@ def summary(self):
columns=col_names)
return df_summary

def fit(self):
def fit(self, cov_type='HC0', **kwargs):
"""
Estimate DoubleMLBLP models.

Parameters
----------
cov_type : str
The covariance type to be used in the estimation. Default is ``'HC0'``.
See :meth:`statsmodels.regression.linear_model.OLS.fit` for more information.

**kwargs: dict
Additional keyword arguments to be passed to :meth:`statsmodels.regression.linear_model.OLS.fit`.

Returns
-------
self : object
"""

# fit the best-linear-predictor of the orthogonal signal with respect to the grid
self._blp_model = sm.OLS(self._orth_signal, self._basis).fit()
self._blp_omega = self._blp_model.cov_HC0
self._blp_model = sm.OLS(self._orth_signal, self._basis).fit(cov_type=cov_type, **kwargs)
self._blp_omega = self._blp_model.cov_params().to_numpy()

return self

Expand Down
6 changes: 3 additions & 3 deletions doubleml/utils/tests/_utils_blp_manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd


def fit_blp(orth_signal, basis):
blp_model = sm.OLS(orth_signal, basis).fit()
def fit_blp(orth_signal, basis, cov_type, **kwargs):
blp_model = sm.OLS(orth_signal, basis).fit(cov_type=cov_type, **kwargs)

return blp_model

Expand All @@ -15,7 +15,7 @@ def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
alpha = 1 - level
g_hat = blp_model.predict(basis)

blp_omega = blp_model.cov_HC0
blp_omega = blp_model.cov_params().to_numpy()

blp_se = np.sqrt((basis.dot(blp_omega) * basis).sum(axis=1))

Expand Down
Loading
Loading