Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bagging for SurvivalBoost to help monotonicity #82

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hazardous/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path

from ._survival_boost import SurvivalBoost
from ._survival_boost import BaggedSurvivalBoost, SurvivalBoost

with open(Path(__file__).parent / "VERSION.txt") as _fh:
__version__ = _fh.read().strip()
Expand All @@ -9,4 +9,5 @@
__all__ = [
"metrics",
"SurvivalBoost",
"BaggedSurvivalBoost",
]
74 changes: 74 additions & 0 deletions hazardous/_survival_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,77 @@ def score(self, X, y):
)
ibs_events.append(ibs_event)
return -np.mean(ibs_events)


class BaggedSurvivalBoost(BaseEstimator, ClassifierMixin):
def __init__(
# TODO: run a grid search on a few datasets to find good defaults.
self,
hard_zero_fraction=0.1,
# TODO: implement convergence criterion and use max_iter instead of
# n_iter.
n_iter=100,
learning_rate=0.05,
max_leaf_nodes=31,
max_depth=None,
min_samples_leaf=50,
show_progressbar=True,
n_time_grid_steps=100,
time_horizon=None,
ipcw_strategy="alternating",
n_iter_before_feedback=20,
random_state=None,
n_horizons_per_observation=3,
bagging=5,
):
self.hard_zero_fraction = hard_zero_fraction
self.n_iter = n_iter
self.learning_rate = learning_rate
self.max_depth = max_depth
self.max_leaf_nodes = max_leaf_nodes
self.min_samples_leaf = min_samples_leaf
self.show_progressbar = show_progressbar
self.n_time_grid_steps = n_time_grid_steps
self.time_horizon = time_horizon
self.n_iter_before_feedback = n_iter_before_feedback
self.ipcw_strategy = ipcw_strategy
self.random_state = random_state
self.n_horizons_per_observation = n_horizons_per_observation
self.bagging = bagging # number of models to train

def fit(self, X, y, times=None):
self.models = []
survival_boost_params = self.get_params()
survival_boost_params.pop("random_state")
survival_boost_params.pop("bagging")
for i in range(self.bagging):
model = SurvivalBoost(
random_state=self.random_state + i, **survival_boost_params
)
model.fit(X, y, times)
self.models.append(model)
return self

def predict_proba(self, X, time_horizon=None):
self.predictions = []
for model in self.models:
self.predictions.append(model.predict_proba(X, time_horizon))
return np.mean(self.predictions, axis=0)

def predict_cumulative_incidence(self, X, times=None):
self.predictions = []
for model in self.models:
self.predictions.append(model.predict_cumulative_incidence(X, times))
return np.mean(self.predictions, axis=0)

def predict_survival_function(self, X, times=None):
self.predictions = []
for model in self.models:
self.predictions.append(model.predict_survival_function(X, times))
return np.mean(self.predictions, axis=0)

def score(self, X, y):
self.scores = []
for model in self.models:
self.scores.append(model.score(X, y))
return np.mean(self.scores)
Loading