Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package] Separately check whether pyarrow and cffi are installed #6785

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import scipy.sparse

from .compat import (
CFFI_INSTALLED,
PANDAS_INSTALLED,
PYARROW_INSTALLED,
arrow_cffi,
Expand Down Expand Up @@ -1706,8 +1707,8 @@ def __pred_for_pyarrow_table(
predict_type: int,
) -> Tuple[np.ndarray, int]:
"""Predict for a PyArrow table."""
if not PYARROW_INSTALLED:
raise LightGBMError("Cannot predict from Arrow without `pyarrow` installed.")
if not (PYARROW_INSTALLED and CFFI_INSTALLED):
raise LightGBMError("Cannot predict from Arrow without 'pyarrow' and 'cffi' installed.")

# Check that the input is valid: we only handle numbers (for now)
if not all(arrow_is_integer(t) or arrow_is_floating(t) or arrow_is_boolean(t) for t in table.schema.types):
Expand Down Expand Up @@ -2458,8 +2459,8 @@ def __init_from_pyarrow_table(
ref_dataset: Optional[_DatasetHandle],
) -> "Dataset":
"""Initialize data from a PyArrow table."""
if not PYARROW_INSTALLED:
raise LightGBMError("Cannot init dataframe from Arrow without `pyarrow` installed.")
if not (PYARROW_INSTALLED and CFFI_INSTALLED):
raise LightGBMError("Cannot init Dataset from Arrow without 'pyarrow' and 'cffi' installed.")

# Check that the input is valid: we only handle numbers (for now)
if not all(arrow_is_integer(t) or arrow_is_floating(t) or arrow_is_boolean(t) for t in table.schema.types):
Expand Down
31 changes: 20 additions & 11 deletions python-package/lightgbm/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ def __init__(self, *args: Any, **kwargs: Any):
from pyarrow import ChunkedArray as pa_ChunkedArray
from pyarrow import Table as pa_Table
from pyarrow import chunked_array as pa_chunked_array
from pyarrow.cffi import ffi as arrow_cffi
from pyarrow.types import is_boolean as arrow_is_boolean
from pyarrow.types import is_floating as arrow_is_floating
from pyarrow.types import is_integer as arrow_is_integer
Expand All @@ -316,6 +315,26 @@ class pa_Table: # type: ignore
def __init__(self, *args: Any, **kwargs: Any):
pass

class pa_compute: # type: ignore
"""Dummy class for pyarrow.compute."""

all = None
equal = None

pa_chunked_array = None
arrow_is_boolean = None
arrow_is_integer = None
arrow_is_floating = None


"""cffi"""
try:
from pyarrow.cffi import ffi as arrow_cffi

CFFI_INSTALLED = True
except ImportError:
CFFI_INSTALLED = False

class arrow_cffi: # type: ignore
"""Dummy class for pyarrow.cffi.ffi."""

Expand All @@ -327,16 +346,6 @@ class arrow_cffi: # type: ignore
def __init__(self, *args: Any, **kwargs: Any):
pass

class pa_compute: # type: ignore
"""Dummy class for pyarrow.compute."""

all = None
equal = None

pa_chunked_array = None
arrow_is_boolean = None
arrow_is_integer = None
arrow_is_floating = None

"""cpu_count()"""
try:
Expand Down
9 changes: 9 additions & 0 deletions tests/python_package_test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import numpy as np
import pytest

import lightgbm


@pytest.fixture(scope="function")
def missing_module_cffi(monkeypatch):
"""Mock 'cffi' not being importable"""
monkeypatch.setattr(lightgbm.compat, "CFFI_INSTALLED", False)
monkeypatch.setattr(lightgbm.basic, "CFFI_INSTALLED", False)


@pytest.fixture(scope="function")
def rng():
Expand Down
30 changes: 30 additions & 0 deletions tests/python_package_test/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,33 @@ def test_arrow_feature_name_manual():
)
booster = lgb.train({"num_leaves": 7}, dataset, num_boost_round=5)
assert booster.feature_name() == ["c", "d"]


def test_dataset_construction_from_pa_table_without_cffi_raises_informative_error(missing_module_cffi):
with pytest.raises(
lgb.basic.LightGBMError, match="Cannot init Dataset from Arrow without 'pyarrow' and 'cffi' installed."
):
lgb.Dataset(
generate_dummy_arrow_table(),
label=pa.array([0, 1, 0, 0, 1]),
params=dummy_dataset_params(),
).construct()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.construct() is necessary here... __init_from_pyarrow_table() is not run as part of lgb.Dataset().



def test_predicting_from_pa_table_without_cffi_raises_informative_error(missing_module_cffi):
data = generate_random_arrow_table(num_columns=3, num_datapoints=1_000, seed=42)
labels = generate_random_arrow_array(num_datapoints=data.shape[0], seed=42)
dataset = lgb.Dataset(
data.to_pandas(),
label=labels.to_pandas(),
)

with pytest.raises(
lgb.basic.LightGBMError, match="Cannot predict from Arrow without 'pyarrow' and 'cffi' installed."
):
bst = lgb.train(
params={"num_leaves": 7, "verbose": -1},
train_set=dataset,
num_boost_round=2,
)
bst.predict(data)
Loading