Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error while using it with CatBoost #131

Open
Diyacmenezes202 opened this issue Jul 12, 2024 · 0 comments
Open

Error while using it with CatBoost #131

Diyacmenezes202 opened this issue Jul 12, 2024 · 0 comments

Comments

@Diyacmenezes202
Copy link

`TypeError Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:384, in BorutaPy._get_imp(self, X, y)
383 try:
--> 384 self.estimator.fit(X, y)
385 except Exception as e:

File ~\anaconda3\Lib\site-packages\catboost\core.py:5220, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5218 CatBoostClassifier._check_is_compatible_loss(params['loss_function'])
-> 5220 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
5221 eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period,
5222 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5223 return self

File ~\anaconda3\Lib\site-packages\catboost\core.py:2385, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
2383 raise CatBoostError("y may be None only when X is an instance of catboost.Pool or string")
-> 2385 train_params = self._prepare_train_params(
2386 X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
2387 pairs=pairs, sample_weight=sample_weight, group_id=group_id, group_weight=group_weight,
2388 subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline, use_best_model=use_best_model,
2389 eval_set=eval_set, verbose=verbose, logging_level=logging_level, plot=plot, plot_file=plot_file,
2390 column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
2391 silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
2392 snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model,
2393 callbacks=callbacks
2394 )
2395 params = train_params["params"]

File ~\anaconda3\Lib\site-packages\catboost\core.py:2311, in CatBoost._prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks)
2310 params = _params_type_cast(params)
-> 2311 _check_train_params(params)
2313 if params.get('eval_fraction', 0.0) != 0.0:

File _catboost.pyx:6393, in _catboost._check_train_params()

File _catboost.pyx:6414, in _catboost._check_train_params()

File _catboost.pyx:1830, in _catboost._PreprocessParams.init()

File ~\anaconda3\Lib\json_init_.py:238, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
233 cls = JSONEncoder
234 return cls(
235 skipkeys=skipkeys, ensure_ascii=ensure_ascii,
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)

File ~\anaconda3\Lib\json\encoder.py:200, in JSONEncoder.encode(self, o)
197 # This doesn't pass the iterator directly to ''.join() because the
198 # exceptions aren't as detailed. The list call should be roughly
199 # equivalent to the PySequence_Fast that ''.join() would do.
--> 200 chunks = self.iterencode(o, _one_shot=True)
201 if not isinstance(chunks, (list, tuple)):

File ~\anaconda3\Lib\json\encoder.py:258, in JSONEncoder.iterencode(self, o, _one_shot)
254 _iterencode = _make_iterencode(
255 markers, self.default, _encoder, self.indent, floatstr,
256 self.key_separator, self.item_separator, self.sort_keys,
257 self.skipkeys, _one_shot)
--> 258 return _iterencode(o, 0)

File _catboost.pyx:158, in _catboost._NumpyAwareEncoder.default()

File ~\anaconda3\Lib\json\encoder.py:180, in JSONEncoder.default(self, o)
162 """Implement this method in a subclass such that it returns
163 a serializable object for o, or calls the base implementation
164 (to raise a TypeError).
(...)
178
179 """
--> 180 raise TypeError(f'Object of type {o.class.name} '
181 f'is not JSON serializable')

TypeError: Object of type RandomState is not JSON serializable

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
Cell In[12], line 46
43 boruta = BorutaPy(catboost, n_estimators='auto',random_state=random_state, verbose=2)
45 # Fit Boruta
---> 46 boruta.fit(X_scaled, y)
48 # Get selected features
49 selected_features = X.columns[boruta.support_].tolist()

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:201, in BorutaPy.fit(self, X, y)
188 def fit(self, X, y):
189 """
190 Fits the Boruta feature selection with the provided estimator.
191
(...)
198 The target values.
199 """
--> 201 return self._fit(X, y)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:285, in BorutaPy._fit(self, X, y)
282 self.estimator.set_params(random_state=self.random_state)
284 # add shadow attributes, shuffle them and train estimator, get imps
--> 285 cur_imp = self._add_shadows_get_imps(X, y, dec_reg)
287 # get the threshold of shadow importances we will use for rejection
288 imp_sha_max = np.percentile(cur_imp[1], self.perc)

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:412, in BorutaPy._add_shadows_get_imps(self, X, y, dec_reg)
410 x_sha = np.apply_along_axis(self._get_shuffle, 0, x_sha)
411 # get importance of the merged matrix
--> 412 imp = self._get_imp(np.hstack((x_cur, x_sha)), y)
413 # separate importances of real and shadow features
414 imp_sha = imp[x_cur_w:]

File ~\anaconda3\Lib\site-packages\boruta\boruta_py.py:386, in BorutaPy.get_imp(self, X, y)
384 self.estimator.fit(X, y)
385 except Exception as e:
--> 386 raise ValueError('Please check your X and y variable. The provided'
387 'estimator cannot be fitted to your data.\n' + str(e))
388 try:
389 imp = self.estimator.feature_importances

ValueError: Please check your X and y variable. The providedestimator cannot be fitted to your data.
Object of type RandomState is not JSON serializable`

I'm getting this error and I've tried serializing the random_state variable to json, but it still persists with the same error.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant