diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 7b152fd2b006..c3abb62469dd 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2126,6 +2126,8 @@ def _lazy_init( categorical_feature=categorical_feature, pandas_categorical=self.pandas_categorical, ) + elif _is_pyarrow_table(data) and feature_name == "auto": + feature_name = data.column_names # process for args params = {} if params is None else params @@ -2185,7 +2187,6 @@ def _lazy_init( self.__init_from_np2d(data, params_str, ref_dataset) elif _is_pyarrow_table(data): self.__init_from_pyarrow_table(data, params_str, ref_dataset) - feature_name = data.column_names elif isinstance(data, list) and len(data) > 0: if _is_list_of_numpy_arrays(data): self.__init_from_list_np2d(data, params_str, ref_dataset) diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py index 3a7e0f8d4fce..d8246f3842de 100644 --- a/tests/python_package_test/test_arrow.py +++ b/tests/python_package_test/test_arrow.py @@ -432,3 +432,25 @@ def test_predict_ranking(): num_boost_round=5, ) assert_equal_predict_arrow_pandas(booster, data) + + +def test_arrow_feature_name_auto(): + data = generate_dummy_arrow_table() + dataset = lgb.Dataset( + data, label=pa.array([0, 1, 0, 0, 1]), params=dummy_dataset_params(), categorical_feature=["a"] + ) + booster = lgb.train({"num_leaves": 7}, dataset, num_boost_round=5) + assert booster.feature_name() == ["a", "b"] + + +def test_arrow_feature_name_manual(): + data = generate_dummy_arrow_table() + dataset = lgb.Dataset( + data, + label=pa.array([0, 1, 0, 0, 1]), + params=dummy_dataset_params(), + feature_name=["c", "d"], + categorical_feature=["c"], + ) + booster = lgb.train({"num_leaves": 7}, dataset, num_boost_round=5) + assert booster.feature_name() == ["c", "d"]