diff --git a/openml/config.py b/openml/config.py index a244a317e..d838b070a 100644 --- a/openml/config.py +++ b/openml/config.py @@ -23,6 +23,7 @@ file_handler: logging.handlers.RotatingFileHandler | None = None OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR" +OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET" class _Config(TypedDict): diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 2b021c8ab..5190ac522 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -18,6 +18,7 @@ import xmltodict from openml.base import OpenMLBase +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import PyOpenMLError from .data_feature import OpenMLDataFeature @@ -359,7 +360,7 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = os.environ.get("OPENML_SKIP_PQ", "false").casefold() == "true" + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 8e67cd55f..3f3c709f9 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -21,6 +21,7 @@ import openml._api_calls import openml.utils +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -562,7 +563,7 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = os.environ.get("OPENML_SKIP_PQ", "false").casefold() == "true" + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: