Skip to content

Commit a55a3fc

Browse files
Change defaults for get_task to be lazy (#1354)
* Change defaults for `get_task` * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix linting errors * Add missing type annotation --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent d37542b commit a55a3fc

File tree

3 files changed

+30
-44
lines changed

3 files changed

+30
-44
lines changed

openml/tasks/functions.py

+28-42
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ def __list_tasks( # noqa: PLR0912, C901
347347
# TODO(eddiebergman): Maybe since this isn't public api, we can make it keyword only?
348348
def get_tasks(
349349
task_ids: list[int],
350-
download_data: bool = True, # noqa: FBT001, FBT002
351-
download_qualities: bool = True, # noqa: FBT001, FBT002
350+
download_data: bool | None = None,
351+
download_qualities: bool | None = None,
352352
) -> list[OpenMLTask]:
353353
"""Download tasks.
354354
@@ -367,79 +367,65 @@ def get_tasks(
367367
-------
368368
list
369369
"""
370+
if download_data is None:
371+
warnings.warn(
372+
"`download_data` will default to False starting in 0.16. "
373+
"Please set `download_data` explicitly to suppress this warning.",
374+
stacklevel=1,
375+
)
376+
download_data = True
377+
378+
if download_qualities is None:
379+
warnings.warn(
380+
"`download_qualities` will default to False starting in 0.16. "
381+
"Please set `download_qualities` explicitly to suppress this warning.",
382+
stacklevel=1,
383+
)
384+
download_qualities = True
385+
370386
tasks = []
371387
for task_id in task_ids:
372-
tasks.append(get_task(task_id, download_data, download_qualities))
388+
tasks.append(
389+
get_task(task_id, download_data=download_data, download_qualities=download_qualities)
390+
)
373391
return tasks
374392

375393

376394
@openml.utils.thread_safe_if_oslo_installed
377395
def get_task(
378396
task_id: int,
379-
*dataset_args: Any,
380-
download_splits: bool | None = None,
397+
download_splits: bool = False, # noqa: FBT001, FBT002
381398
**get_dataset_kwargs: Any,
382399
) -> OpenMLTask:
383400
"""Download OpenML task for a given task ID.
384401
385-
Downloads the task representation. By default, this will also download the data splits and
386-
the dataset. From version 0.15.0 onwards, the splits nor the dataset will not be downloaded by
387-
default.
402+
Downloads the task representation.
388403
389404
Use the `download_splits` parameter to control whether the splits are downloaded.
390405
Moreover, you may pass additional parameter (args or kwargs) that are passed to
391406
:meth:`openml.datasets.get_dataset`.
392-
For backwards compatibility, if `download_data` is passed as an additional parameter and
393-
`download_splits` is not explicitly set, `download_data` also overrules `download_splits`'s
394-
value (deprecated from Version 0.15.0 onwards).
395407
396408
Parameters
397409
----------
398410
task_id : int
399411
The OpenML task id of the task to download.
400-
download_splits: bool (default=True)
401-
Whether to download the splits as well. From version 0.15.0 onwards this is independent
402-
of download_data and will default to ``False``.
403-
dataset_args, get_dataset_kwargs :
412+
download_splits: bool (default=False)
413+
Whether to download the splits as well.
414+
get_dataset_kwargs :
404415
Args and kwargs can be used pass optional parameters to :meth:`openml.datasets.get_dataset`.
405-
This includes `download_data`. If set to True the splits are downloaded as well
406-
(deprecated from Version 0.15.0 onwards). The args are only present for backwards
407-
compatibility and will be removed from version 0.15.0 onwards.
408416
409417
Returns
410418
-------
411419
task: OpenMLTask
412420
"""
413-
if download_splits is None:
414-
# TODO(0.15): Switch download splits to False by default, adjust typing above, adjust
415-
# documentation above, and remove warning.
416-
warnings.warn(
417-
"Starting from Version 0.15.0 `download_splits` will default to ``False`` instead "
418-
"of ``True`` and be independent from `download_data`. To disable this message until "
419-
"version 0.15 explicitly set `download_splits` to a bool.",
420-
FutureWarning,
421-
stacklevel=3,
422-
)
423-
download_splits = get_dataset_kwargs.get("download_data", True)
424-
425421
if not isinstance(task_id, int):
426-
# TODO(0.15): Remove warning
427-
warnings.warn(
428-
"Task id must be specified as `int` from 0.14.0 onwards.",
429-
FutureWarning,
430-
stacklevel=3,
431-
)
432-
433-
try:
434-
task_id = int(task_id)
435-
except (ValueError, TypeError) as e:
436-
raise ValueError("Dataset ID is neither an Integer nor can be cast to an Integer.") from e
422+
raise TypeError(f"Task id should be integer, is {type(task_id)}")
437423

438424
tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
439425

440426
try:
441427
task = _get_task_description(task_id)
442-
dataset = get_dataset(task.dataset_id, *dataset_args, **get_dataset_kwargs)
428+
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
443429
# List of class labels available in dataset description
444430
# Including class labels as part of task meta data handles
445431
# the case where data download was initially disabled

openml/tasks/task.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]:
145145
]
146146
return [(key, fields[key]) for key in order if key in fields]
147147

148-
def get_dataset(self, **kwargs) -> datasets.OpenMLDataset:
148+
def get_dataset(self, **kwargs: Any) -> datasets.OpenMLDataset:
149149
"""Download dataset associated with task.
150150
151151
Accepts the same keyword arguments as the `openml.datasets.get_dataset`.

tests/test_tasks/test_task_functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_get_task(self):
154154
assert os.path.exists(
155155
os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml")
156156
)
157-
assert os.path.exists(
157+
assert not os.path.exists(
158158
os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff")
159159
)
160160
assert os.path.exists(

0 commit comments

Comments
 (0)