MemoryError: Unable to allocate sufficient memory

### Issue Description

I used a Random Survival Forest with 10 estimators and a max depth of 25 on approximately 1800 data samples. The full dataset otherwise contains approximately 200,000 data samples, but I intentionally only used a very small sample when I encountered this error.
When attempting to fit a ModelSurvSHAP on this very small dummy random survival forest I encounter the following error: `MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64`

I'm using survshap version 0.4.2.

### Minimal Reproducible Code Sample
```python
rsf = RandomSurvivalForest(
        n_estimators=10, max_depth=25, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=random_state
    )
rsf.fit(X_train, y_train)

from survshap import SurvivalModelExplainer, PredictSurvSHAP, ModelSurvSHAP

rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)

exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
exp1_survshap_global_rsf.fit(rsf_exp)
```

### Error Trace:
```shell
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
Cell In[38], line 6
      3 rsf_exp = SurvivalModelExplainer(rsf, X_test, y_test)
      5 exp1_survshap_global_rsf = ModelSurvSHAP(random_state=42)
----> 6 exp1_survshap_global_rsf.fit(rsf_exp)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\object.py:76, in ModelSurvSHAP.fit(self, explainer, new_observations, timestamps, save_individual_explanations, **kwargs)
     69 if new_observations is None:
     70     new_observations = explainer.data
     72 (
     73     self.full_result,
     74     self.individual_explanations,
     75     self.timestamps,
---> 76 ) = calculate_individual_explanations(
     77     explainer,
     78     new_observations,
     79     self.function_type,
     80     self.path,
     81     self.B,
     82     self.max_shap_value_inputs,
     83     self.random_state,
     84     self.calculation_method,
     85     self.aggregation_method,
     86     timestamps,
     87     save_individual_explanations,
     88     **kwargs
     89 )
     91 names = explainer.y.dtype.names
     92 self.event_ind = explainer.y[names[0]]

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\model_explanations\utils.py:127, in calculate_individual_explanations(explainer, new_observations, function_type, path, B, max_shap_value_inputs, random_state, calculation_method, aggregation_method, timestamps, save_individual_explanations, **kwargs)
    117 for i in tqdm(range(len(new_observations))):
    118     survSHAP_obj = PredictSurvSHAP(
    119         function_type=function_type,
    120         path=path,
   (...)
    125         random_state=random_state,
    126     )
--> 127     survSHAP_obj.fit(explainer, new_observations.iloc[[i]], timestamps)
    128     if save_individual_explanations:
    129         individual_explanations.append(survSHAP_obj)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\object.py:81, in PredictSurvSHAP.fit(self, explainer, new_observation, timestamps, y_true)
     72     self.y_true_time = y_true[names[1]]
     74 if self.calculation_method == "kernel":
     75     (
     76         self.result,
     77         self.predicted_function,
     78         self.baseline_function,
     79         self.timestamps,
     80         self.r2,
---> 81     ) = shap_kernel(
     82         explainer,
     83         new_observation,
     84         self.function,
     85         self.aggregation_method,
     86         timestamps,
     87         self.max_shap_value_inputs,
     88     )
     89 elif self.calculation_method == "sampling":
     90     (
     91         self.result,
     92         self.predicted_function,
   (...)
    104         self.exact,
    105     )

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:106, in shap_kernel(explainer, new_observation, function_type, aggregation_method, timestamps, max_shap_value_inputs)
    101     print(
    102         f"Approximate Survival Shapley will sample only {max_shap_value_inputs} values instead of 2**{p} for Exact Shapley"
    103     )
    105 kernel_weights = generate_shap_kernel_weights(simplified_inputs, p)
--> 106 shap_values, r2 = calculate_shap_values(
    107     explainer,
    108     function_type,
    109     baseline_f,
    110     explainer.data,
    111     simplified_inputs,
    112     kernel_weights,
    113     new_observation,
    114     timestamps,
    115 )
    117 variable_names = explainer.data.columns
    118 result = prepare_result_df(new_observation, variable_names, shap_values, timestamps, aggregation_method)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\survshap\predict_explanations\utils.py:158, in calculate_shap_values(model, function_type, avg_function, data, simplified_inputs, shap_kernel_weights, new_observation, timestamps)
    148 def calculate_shap_values(
    149     model,
    150     function_type,
   (...)
    156     timestamps,
    157 ):
--> 158     W = np.diag(shap_kernel_weights)
    159     X = np.array(simplified_inputs)
    160     R = np.linalg.inv(X.T @ W @ X) @ (X.T @ W)

File c:\Users\alenk\anaconda3\envs\azureml_py310_sdkv2\lib\site-packages\numpy\lib\twodim_base.py:293, in diag(v, k)
    291 if len(s) == 1:
    292     n = s[0]+abs(k)
--> 293     res = zeros((n, n), v.dtype)
    294     if k >= 0:
    295         i = k

MemoryError: Unable to allocate 512. TiB for an array with shape (8388608, 8388608) and data type float64
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

MemoryError: Unable to allocate sufficient memory #31

Issue Description

Minimal Reproducible Code Sample

Error Trace:

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

MemoryError: Unable to allocate sufficient memory #31

Description

Issue Description

Minimal Reproducible Code Sample

Error Trace:

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions