Open
Description
Issue Description
I used a Random Survival Forest with 10 estimators and a max depth of 25 on approximately 1800 data samples. The full dataset otherwise contains approximately 200,000 data samples, but I intentionally only used a very small sample when I encountered this error.
When attempting to fit a ModelSurvSHAP on this very small dummy random survival forest I encounter the following error: MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64
I'm using survshap version 0.4.2.
Minimal Reproducible Code Sample
rsf = RandomSurvivalForest(
n_estimators=10, max_depth=25, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=random_state
)
rsf.fit(X_train, y_train)
from survshap import SurvivalModelExplainer, PredictSurvSHAP, ModelSurvSHAP
rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)
exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
exp1_survshap_global_rsf.fit(rsf_exp)
Error Trace:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
Cell In[38], line 6
3 rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)
5 exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
----> 6 exp1_survshap_global_rsf.fit(rsf_exp)
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\object.py:76, in ModelSurvSHAP.fit(self, explainer, new_observations, timestamps, save_individual_explanations, **kwargs)
69 if new_observations is None:
70 new_observations = explainer.data
72 (
73 self.full_result,
74 self.individual_explanations,
75 self.timestamps,
---> 76 ) = calculate_individual_explanations(
77 explainer,
78 new_observations,
79 self.function_type,
80 self.path,
81 self.B,
82 self.max_shap_value_inputs,
83 self.random_state,
84 self.calculation_method,
85 self.aggregation_method,
86 timestamps,
87 save_individual_explanations,
88 **kwargs
89 )
91 names = explainer.y.dtype.names
92 self.event_ind = explainer.y[names[0]]
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\utils.py:127, in calculate_individual_explanations(explainer, new_observations, function_type, path, B, max_shap_value_inputs, random_state, calculation_method, aggregation_method, timestamps, save_individual_explanations, **kwargs)
117 for i in tqdm(range(len(new_observations))):
118 survSHAP_obj = PredictSurvSHAP(
119 function_type=function_type,
120 path=path,
(...)
125 random_state=random_state,
126 )
--> 127 survSHAP_obj.fit(explainer, new_observations.iloc[[i]], timestamps)
128 if save_individual_explanations:
129 individual_explanations.append(survSHAP_obj)
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\object.py:81, in PredictSurvSHAP.fit(self, explainer, new_observation, timestamps, y_true)
72 self.y_true_time = y_true[names[1]]
74 if self.calculation_method == "kernel":
75 (
76 self.result,
77 self.predicted_function,
78 self.baseline_function,
79 self.timestamps,
80 self.r2,
---> 81 ) = shap_kernel(
82 explainer,
83 new_observation,
84 self.function,
85 self.aggregation_method,
86 timestamps,
87 self.max_shap_value_inputs,
88 )
89 elif self.calculation_method == "sampling":
90 (
91 self.result,
92 self.predicted_function,
(...)
104 self.exact,
105 )
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:106, in shap_kernel(explainer, new_observation, function_type, aggregation_method, timestamps, max_shap_value_inputs)
101 print(
102 f"Approximate Survival Shapley will sample only {max_shap_value_inputs} values instead of 2**{p} for Exact Shapley"
103 )
105 kernel_weights = generate_shap_kernel_weights(simplified_inputs, p)
--> 106 shap_values, r2 = calculate_shap_values(
107 explainer,
108 function_type,
109 baseline_f,
110 explainer.data,
111 simplified_inputs,
112 kernel_weights,
113 new_observation,
114 timestamps,
115 )
117 variable_names = explainer.data.columns
118 result = prepare_result_df(new_observation, variable_names, shap_values, timestamps, aggregation_method)
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:158, in calculate_shap_values(model, function_type, avg_function, data, simplified_inputs, shap_kernel_weights, new_observation, timestamps)
148 def calculate_shap_values(
149 model,
150 function_type,
(...)
156 timestamps,
157 ):
--> 158 W = np.diag(shap_kernel_weights)
159 X = np.array(simplified_inputs)
160 R = np.linalg.inv(X.T @ W @ X) @ (X.T @ W)
File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\numpy\lib\twodim_base.py:293, in diag(v, k)
291 if len(s) == 1:
292 n = s[0]+abs(k)
--> 293 res = zeros((n, n), v.dtype)
294 if k >= 0:
295 i = k
MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64
Metadata
Metadata
Assignees
Labels
No labels