Skip to content

MemoryError: Unable to allocate sufficient memory #31

Open
@kaalen

Description

@kaalen

Issue Description

I used a Random Survival Forest with 10 estimators and a max depth of 25 on approximately 1800 data samples. The full dataset otherwise contains approximately 200,000 data samples, but I intentionally only used a very small sample when I encountered this error.
When attempting to fit a ModelSurvSHAP on this very small dummy random survival forest I encounter the following error: MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64

I'm using survshap version 0.4.2.

Minimal Reproducible Code Sample

rsf = RandomSurvivalForest(
        n_estimators=10, max_depth=25, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=random_state
    )
rsf.fit(X_train, y_train)

from survshap import SurvivalModelExplainer, PredictSurvSHAP, ModelSurvSHAP

rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)

exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
exp1_survshap_global_rsf.fit(rsf_exp)

Error Trace:

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
Cell In[38], line 6
      3 rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)
      5 exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
----> 6 exp1_survshap_global_rsf.fit(rsf_exp)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\object.py:76, in ModelSurvSHAP.fit(self, explainer, new_observations, timestamps, save_individual_explanations, **kwargs)
     69 if new_observations is None:
     70     new_observations = explainer.data
     72 (
     73     self.full_result,
     74     self.individual_explanations,
     75     self.timestamps,
---> 76 ) = calculate_individual_explanations(
     77     explainer,
     78     new_observations,
     79     self.function_type,
     80     self.path,
     81     self.B,
     82     self.max_shap_value_inputs,
     83     self.random_state,
     84     self.calculation_method,
     85     self.aggregation_method,
     86     timestamps,
     87     save_individual_explanations,
     88     **kwargs
     89 )
     91 names = explainer.y.dtype.names
     92 self.event_ind = explainer.y[names[0]]

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\utils.py:127, in calculate_individual_explanations(explainer, new_observations, function_type, path, B, max_shap_value_inputs, random_state, calculation_method, aggregation_method, timestamps, save_individual_explanations, **kwargs)
    117 for i in tqdm(range(len(new_observations))):
    118     survSHAP_obj = PredictSurvSHAP(
    119         function_type=function_type,
    120         path=path,
   (...)
    125         random_state=random_state,
    126     )
--> 127     survSHAP_obj.fit(explainer, new_observations.iloc[[i]], timestamps)
    128     if save_individual_explanations:
    129         individual_explanations.append(survSHAP_obj)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\object.py:81, in PredictSurvSHAP.fit(self, explainer, new_observation, timestamps, y_true)
     72     self.y_true_time = y_true[names[1]]
     74 if self.calculation_method == "kernel":
     75     (
     76         self.result,
     77         self.predicted_function,
     78         self.baseline_function,
     79         self.timestamps,
     80         self.r2,
---> 81     ) = shap_kernel(
     82         explainer,
     83         new_observation,
     84         self.function,
     85         self.aggregation_method,
     86         timestamps,
     87         self.max_shap_value_inputs,
     88     )
     89 elif self.calculation_method == "sampling":
     90     (
     91         self.result,
     92         self.predicted_function,
   (...)
    104         self.exact,
    105     )

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:106, in shap_kernel(explainer, new_observation, function_type, aggregation_method, timestamps, max_shap_value_inputs)
    101     print(
    102         f"Approximate Survival Shapley will sample only {max_shap_value_inputs} values instead of 2**{p} for Exact Shapley"
    103     )
    105 kernel_weights = generate_shap_kernel_weights(simplified_inputs, p)
--> 106 shap_values, r2 = calculate_shap_values(
    107     explainer,
    108     function_type,
    109     baseline_f,
    110     explainer.data,
    111     simplified_inputs,
    112     kernel_weights,
    113     new_observation,
    114     timestamps,
    115 )
    117 variable_names = explainer.data.columns
    118 result = prepare_result_df(new_observation, variable_names, shap_values, timestamps, aggregation_method)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:158, in calculate_shap_values(model, function_type, avg_function, data, simplified_inputs, shap_kernel_weights, new_observation, timestamps)
    148 def calculate_shap_values(
    149     model,
    150     function_type,
   (...)
    156     timestamps,
    157 ):
--> 158     W = np.diag(shap_kernel_weights)
    159     X = np.array(simplified_inputs)
    160     R = np.linalg.inv(X.T @ W @ X) @ (X.T @ W)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\numpy\lib\twodim_base.py:293, in diag(v, k)
    291 if len(s) == 1:
    292     n = s[0]+abs(k)
--> 293     res = zeros((n, n), v.dtype)
    294     if k >= 0:
    295         i = k

MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions