From 12f1455bc65a1cd545e210b9217f963ec2db258d Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Wed, 6 Nov 2019 09:40:34 +0100
Subject: [PATCH 01/48] Fixing broken links (#864)

---
 doc/contributing.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/contributing.rst b/doc/contributing.rst
index 067f2dcad..d23ac0ad2 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -167,7 +167,7 @@ not have the capacity to develop and maintain such interfaces on its own. For th
 have built an extension interface to allows others to contribute back. Building a suitable 
 extension for therefore requires an understanding of the current OpenML-Python support.
 
-`This example <examples/flows_and_runs_tutorial.html>`_ 
+`This example <examples/20_basic/simple_flows_and_runs_tutorial.html>`_
 shows how scikit-learn currently works with OpenML-Python as an extension. The *sklearn*
 extension packaged with the `openml-python <https://github.com/openml/openml-python>`_
 repository can be used as a template/benchmark to build the new extension.
@@ -188,7 +188,7 @@ API
 Interfacing with OpenML-Python
 ++++++++++++++++++++++++++++++
 Once the new extension class has been defined, the openml-python module to 
-:meth:`openml.extensions.register_extension.html` must be called to allow OpenML-Python to
+:meth:`openml.extensions.register_extension` must be called to allow OpenML-Python to
 interface the new extension.
 
 

From e489f41d5ff1341b6d25ca07ccd9c20e96f97ed5 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Wed, 6 Nov 2019 17:03:03 +0100
Subject: [PATCH 02/48] Adding license to each source file (#862)

* Preliminary addition of license to source files

* Adding license to almost every source file
---
 CONTRIBUTING.md                                               | 2 ++
 PULL_REQUEST_TEMPLATE.md                                      | 3 ++-
 ci_scripts/create_doc.sh                                      | 2 ++
 ci_scripts/flake8_diff.sh                                     | 2 ++
 ci_scripts/install.sh                                         | 2 ++
 ci_scripts/success.sh                                         | 2 ++
 ci_scripts/test.sh                                            | 2 ++
 doc/progress.rst                                              | 4 ++++
 examples/20_basic/introduction_tutorial.py                    | 3 +++
 examples/20_basic/simple_datasets_tutorial.py                 | 2 ++
 examples/20_basic/simple_flows_and_runs_tutorial.py           | 2 ++
 examples/20_basic/simple_suites_tutorial.py                   | 2 ++
 examples/30_extended/configure_logging.py                     | 2 ++
 examples/30_extended/create_upload_tutorial.py                | 3 +++
 examples/30_extended/datasets_tutorial.py                     | 3 +++
 examples/30_extended/fetch_evaluations_tutorial.py            | 3 +++
 examples/30_extended/flow_id_tutorial.py                      | 3 +++
 examples/30_extended/flows_and_runs_tutorial.py               | 2 ++
 examples/30_extended/plot_svm_hyperparameters_tutorial.py     | 3 +++
 examples/30_extended/run_setup_tutorial.py                    | 3 +++
 examples/30_extended/study_tutorial.py                        | 3 +++
 examples/30_extended/suites_tutorial.py                       | 4 +++-
 examples/30_extended/task_manual_iteration_tutorial.py        | 2 ++
 examples/30_extended/tasks_tutorial.py                        | 2 ++
 examples/40_paper/2015_neurips_feurer_example.py              | 2 ++
 examples/40_paper/2018_ida_strang_example.py                  | 3 +++
 examples/40_paper/2018_kdd_rijn_example.py                    | 3 +++
 examples/40_paper/2018_neurips_perrone_example.py             | 3 +++
 openml/__init__.py                                            | 2 ++
 openml/__version__.py                                         | 2 ++
 openml/_api_calls.py                                          | 2 ++
 openml/base.py                                                | 2 ++
 openml/config.py                                              | 3 +++
 openml/datasets/__init__.py                                   | 2 ++
 openml/datasets/data_feature.py                               | 3 +++
 openml/datasets/dataset.py                                    | 2 ++
 openml/datasets/functions.py                                  | 2 ++
 openml/evaluations/__init__.py                                | 2 ++
 openml/evaluations/evaluation.py                              | 2 ++
 openml/evaluations/functions.py                               | 2 ++
 openml/exceptions.py                                          | 3 +++
 openml/extensions/__init__.py                                 | 2 ++
 openml/extensions/extension_interface.py                      | 2 ++
 openml/extensions/functions.py                                | 2 ++
 openml/extensions/sklearn/__init__.py                         | 2 ++
 openml/extensions/sklearn/extension.py                        | 2 ++
 openml/flows/__init__.py                                      | 2 ++
 openml/flows/flow.py                                          | 2 ++
 openml/flows/functions.py                                     | 2 ++
 openml/runs/__init__.py                                       | 2 ++
 openml/runs/functions.py                                      | 2 ++
 openml/runs/run.py                                            | 2 ++
 openml/runs/trace.py                                          | 2 ++
 openml/setups/__init__.py                                     | 2 ++
 openml/setups/functions.py                                    | 2 ++
 openml/setups/setup.py                                        | 2 ++
 openml/study/__init__.py                                      | 2 ++
 openml/study/functions.py                                     | 2 ++
 openml/study/study.py                                         | 2 ++
 openml/tasks/__init__.py                                      | 2 ++
 openml/tasks/functions.py                                     | 2 ++
 openml/tasks/split.py                                         | 2 ++
 openml/tasks/task.py                                          | 2 ++
 openml/testing.py                                             | 2 ++
 openml/utils.py                                               | 2 ++
 setup.py                                                      | 2 ++
 tests/__init__.py                                             | 2 ++
 tests/conftest.py                                             | 2 ++
 tests/test_datasets/test_dataset.py                           | 2 ++
 tests/test_datasets/test_dataset_functions.py                 | 2 ++
 tests/test_evaluations/test_evaluation_functions.py           | 2 ++
 tests/test_evaluations/test_evaluations_example.py            | 2 ++
 tests/test_extensions/test_functions.py                       | 2 ++
 .../test_sklearn_extension/test_sklearn_extension.py          | 2 ++
 tests/test_flows/dummy_learn/dummy_forest.py                  | 3 +++
 tests/test_flows/test_flow.py                                 | 2 ++
 tests/test_flows/test_flow_functions.py                       | 2 ++
 tests/test_openml/test_config.py                              | 2 ++
 tests/test_openml/test_openml.py                              | 2 ++
 tests/test_runs/test_run.py                                   | 2 ++
 tests/test_runs/test_run_functions.py                         | 2 ++
 tests/test_runs/test_trace.py                                 | 2 ++
 tests/test_setups/__init__.py                                 | 2 ++
 tests/test_setups/test_setup_functions.py                     | 2 ++
 tests/test_study/test_study_examples.py                       | 2 ++
 tests/test_study/test_study_functions.py                      | 2 ++
 tests/test_tasks/__init__.py                                  | 2 ++
 tests/test_tasks/test_classification_task.py                  | 2 ++
 tests/test_tasks/test_clustering_task.py                      | 2 ++
 tests/test_tasks/test_learning_curve_task.py                  | 2 ++
 tests/test_tasks/test_regression_task.py                      | 2 ++
 tests/test_tasks/test_split.py                                | 2 ++
 tests/test_tasks/test_supervised_task.py                      | 2 ++
 tests/test_tasks/test_task.py                                 | 2 ++
 tests/test_tasks/test_task_functions.py                       | 2 ++
 tests/test_tasks/test_task_methods.py                         | 2 ++
 96 files changed, 210 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5a77dfd58..7a4da2e1e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -106,6 +106,8 @@ following rules before you submit a pull request:
 
  - Add your changes to the changelog in the file doc/progress.rst.
 
+ - If any source file is being added to the repository, please add the BSD 3-Clause license to it.
+
 
 You can also check for common programming errors with the following
 tools:
diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md
index 571ae0d1c..47a5741e6 100644
--- a/PULL_REQUEST_TEMPLATE.md
+++ b/PULL_REQUEST_TEMPLATE.md
@@ -5,12 +5,13 @@ the contribution guidelines: https://github.com/openml/openml-python/blob/master
 Please make sure that:
 
 * this pull requests is against the `develop` branch
-* you updated all docs, this includes the changelog!
+* you updated all docs, this includes the changelog (doc/progress.rst)
 * for any new function or class added, please add it to doc/api.rst
     * the list of classes and functions should be alphabetical 
 * for any new functionality, consider adding a relevant example
 * add unit tests for new functionalities
     * collect files uploaded to test server using _mark_entity_for_removal()
+* add the BSD 3-Clause license to any new file created
 -->
 
 #### Reference Issue
diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh
index c9dd800a0..83afaa26b 100644
--- a/ci_scripts/create_doc.sh
+++ b/ci_scripts/create_doc.sh
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 set -euo pipefail
 
 # Check if DOCPUSH is set
diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
index d74577341..1e32f2c7d 100755
--- a/ci_scripts/flake8_diff.sh
+++ b/ci_scripts/flake8_diff.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# License: BSD 3-Clause
+
 # Update /CONTRIBUTING.md if these commands change.
 # The reason for not advocating using this script directly is that it
 # might not work out of the box on Windows.
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index a223cf84b..5c338fe5e 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 # Deactivate the travis-provided virtual environment and setup a
 # conda-based environment instead
 deactivate
diff --git a/ci_scripts/success.sh b/ci_scripts/success.sh
index dbeb18e58..dad97d54e 100644
--- a/ci_scripts/success.sh
+++ b/ci_scripts/success.sh
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 set -e
 
 if [[ "$COVERAGE" == "true" ]]; then
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index f46b0eecb..8659a105b 100644
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 set -e
 
 # check status and branch before running the unit tests
diff --git a/doc/progress.rst b/doc/progress.rst
index 97fc165a1..ba6225986 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,6 +6,10 @@
 Changelog
 =========
 
+0.10.2
+~~~~~~
+* DOC #862: Added license BSD 3-Clause to each of the source files.
+
 0.10.1
 ~~~~~~
 * ADD #175: Automatically adds the docstring of scikit-learn objects to flow and its parameters.
diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py
index 42537724c..151692fdc 100644
--- a/examples/20_basic/introduction_tutorial.py
+++ b/examples/20_basic/introduction_tutorial.py
@@ -55,6 +55,9 @@
 #   crowding with example datasets, tasks, studies, and so on.
 
 ############################################################################
+
+# License: BSD 3-Clause
+
 import openml
 from sklearn import neighbors
 
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
index dfefbe1e3..bb90aedcc 100644
--- a/examples/20_basic/simple_datasets_tutorial.py
+++ b/examples/20_basic/simple_datasets_tutorial.py
@@ -11,6 +11,8 @@
 # at OpenML. However, for the purposes of this tutorial, we are going to work with
 # the datasets directly.
 
+# License: BSD 3-Clause
+
 import openml
 ############################################################################
 # List datasets
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
index e3f028418..14c5c7761 100644
--- a/examples/20_basic/simple_flows_and_runs_tutorial.py
+++ b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -5,6 +5,8 @@
 A simple tutorial on how to train/run a model and how to upload the results.
 """
 
+# License: BSD 3-Clause
+
 import openml
 from sklearn import ensemble, neighbors
 
diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py
index 3a555b9d3..37f1eeffb 100644
--- a/examples/20_basic/simple_suites_tutorial.py
+++ b/examples/20_basic/simple_suites_tutorial.py
@@ -9,6 +9,8 @@
 and simplify both the sharing of the setup and the results.
 """
 
+# License: BSD 3-Clause
+
 import openml
 
 ####################################################################################################
diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
index e16dfe245..9b14fffd6 100644
--- a/examples/30_extended/configure_logging.py
+++ b/examples/30_extended/configure_logging.py
@@ -22,6 +22,8 @@
 # It is possible to configure what log levels to send to console and file.
 # When downloading a dataset from OpenML, a `DEBUG`-level message is written:
 
+# License: BSD 3-Clause
+
 import openml
 openml.datasets.get_dataset('iris')
 
diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index faca335ea..7c3af4b9f 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -4,6 +4,9 @@
 
 A tutorial on how to create and upload a dataset to OpenML.
 """
+
+# License: BSD 3-Clause
+
 import numpy as np
 import pandas as pd
 import sklearn.datasets
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 357360f80..4728008b4 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -6,6 +6,9 @@
 How to list and download datasets.
 """
 ############################################################################
+
+# License: BSD 3-Clauses
+
 import openml
 import pandas as pd
 
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index b6e15e221..b1c7b9a3d 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -20,6 +20,9 @@
 """
 
 ############################################################################
+
+# License: BSD 3-Clause
+
 import openml
 
 ############################################################################
diff --git a/examples/30_extended/flow_id_tutorial.py b/examples/30_extended/flow_id_tutorial.py
index 5bb001493..ef3689ea1 100644
--- a/examples/30_extended/flow_id_tutorial.py
+++ b/examples/30_extended/flow_id_tutorial.py
@@ -8,6 +8,9 @@
 """
 
 ####################################################################################################
+
+# License: BSD 3-Clause
+
 import sklearn.tree
 
 import openml
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index d5740e5ab..b307ad260 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -5,6 +5,8 @@
 How to train/run a model and how to upload the results.
 """
 
+# License: BSD 3-Clause
+
 import openml
 from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree
 
diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index 714e64221..ad91d9af9 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -3,6 +3,9 @@
 Plotting hyperparameter surfaces
 ================================
 """
+
+# License: BSD 3-Clause
+
 import openml
 import numpy as np
 
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index 8ce03f4b6..071cc51b1 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -29,6 +29,9 @@
    connects to the test server at test.openml.org. This prevents the main
    server from crowding with example datasets, tasks, runs, and so on.
 """
+
+# License: BSD 3-Clause
+
 import numpy as np
 import openml
 import sklearn.ensemble
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index de2be33f8..9a9729a5c 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -10,6 +10,9 @@
 tasks, all required information about a study can be retrieved.
 """
 ############################################################################
+
+# License: BSD 3-Clause
+
 import uuid
 
 import numpy as np
diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
index c5eb5718f..b41e08e74 100644
--- a/examples/30_extended/suites_tutorial.py
+++ b/examples/30_extended/suites_tutorial.py
@@ -10,13 +10,15 @@
 `OpenML benchmark docs <https://docs.openml.org/benchmark/#benchmarking-suites>`_.
 """
 ############################################################################
+
+# License: BSD 3-Clause
+
 import uuid
 
 import numpy as np
 
 import openml
 
-
 ############################################################################
 # .. warning:: This example uploads data. For that reason, this example
 #   connects to the test server at test.openml.org before doing so.
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index e4f070501..7ec824e38 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -10,6 +10,8 @@
 but not OpenML's functionality to conduct runs.
 """
 
+# License: BSD 3-Clause
+
 import openml
 
 ####################################################################################################
diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py
index 1fb23f63d..e12c6f653 100644
--- a/examples/30_extended/tasks_tutorial.py
+++ b/examples/30_extended/tasks_tutorial.py
@@ -5,6 +5,8 @@
 A tutorial on how to list and download tasks.
 """
 
+# License: BSD 3-Clause
+
 import openml
 import pandas as pd
 
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index 8ca2412ba..58b242add 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -15,6 +15,8 @@
 | Available at http://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
 """  # noqa F401
 
+# License: BSD 3-Clause
+
 import pandas as pd
 
 import openml
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index ef35a4a21..3f9bcc49e 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -13,6 +13,9 @@
 | In *Advances in Intelligent Data Analysis XVII 17th International Symposium*, 2018
 | Available at https://link.springer.com/chapter/10.1007%2F978-3-030-01768-2_25
 """
+
+# License: BSD 3-Clause
+
 import matplotlib.pyplot as plt
 import openml
 import pandas as pd
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index 3302333ae..ae2a0672e 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -15,6 +15,9 @@
 | In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018
 | Available at https://dl.acm.org/citation.cfm?id=3220058
 """
+
+# License: BSD 3-Clause
+
 import sys
 
 if sys.platform == 'win32':  # noqa
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 5513fab30..2127bdfe4 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -24,6 +24,9 @@
 """
 
 ############################################################################
+
+# License: BSD 3-Clause
+
 import openml
 import numpy as np
 import pandas as pd
diff --git a/openml/__init__.py b/openml/__init__.py
index 94c46341f..f71c32e40 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -15,6 +15,8 @@
 <http://en.wikipedia.org/wiki/Representational_state_transfer>`_).
 """
 
+# License: BSD 3-Clause
+
 from . import _api_calls
 from . import config
 from .datasets import OpenMLDataset, OpenMLDataFeature
diff --git a/openml/__version__.py b/openml/__version__.py
index 30750c80a..ec9e2af03 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -1,4 +1,6 @@
 """Version information."""
 
+# License: BSD 3-Clause
+
 # The following line *must* be the last in the module, exactly as formatted:
 __version__ = "0.10.1"
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 22223d587..5068dc208 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import time
 from typing import Dict
 import requests
diff --git a/openml/base.py b/openml/base.py
index 9e28bd055..e02aabb0f 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from abc import ABC, abstractmethod
 from collections import OrderedDict
 import re
diff --git a/openml/config.py b/openml/config.py
index 2af1bfef6..eee2c7fdb 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -1,6 +1,9 @@
 """
 Store module level information like the API key, cache directory and the server
 """
+
+# License: BSD 3-Clause
+
 import logging
 import logging.handlers
 import os
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
index 8f52e16fc..9783494af 100644
--- a/openml/datasets/__init__.py
+++ b/openml/datasets/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .functions import (
     attributes_arff_from_df,
     check_datasets_active,
diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py
index 077be639e..dfb1aa112 100644
--- a/openml/datasets/data_feature.py
+++ b/openml/datasets/data_feature.py
@@ -1,3 +1,6 @@
+# License: BSD 3-Clause
+
+
 class OpenMLDataFeature(object):
     """
     Data Feature (a.k.a. Attribute) object.
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 26215736d..9f831458b 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import re
 import gzip
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index bc2606506..e85c55aa3 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import io
 import logging
 import os
diff --git a/openml/evaluations/__init__.py b/openml/evaluations/__init__.py
index 43cec8738..0bee18ba3 100644
--- a/openml/evaluations/__init__.py
+++ b/openml/evaluations/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .evaluation import OpenMLEvaluation
 from .functions import list_evaluations, list_evaluation_measures, list_evaluations_setups
 
diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py
index 9d8507708..1adb449a5 100644
--- a/openml/evaluations/evaluation.py
+++ b/openml/evaluations/evaluation.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import openml.config
 
 
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index 8de69ebc1..cf2169c79 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import json
 import xmltodict
 import pandas as pd
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 78accd671..6dff18a52 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -1,3 +1,6 @@
+# License: BSD 3-Clause
+
+
 class PyOpenMLError(Exception):
     def __init__(self, message: str):
         self.message = message
diff --git a/openml/extensions/__init__.py b/openml/extensions/__init__.py
index 374e856e3..13b644e04 100644
--- a/openml/extensions/__init__.py
+++ b/openml/extensions/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from typing import List, Type  # noqa: F401
 
 from .extension_interface import Extension
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
index d963edb1b..070d17205 100644
--- a/openml/extensions/extension_interface.py
+++ b/openml/extensions/extension_interface.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from abc import ABC, abstractmethod
 from collections import OrderedDict  # noqa: F401
 from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
diff --git a/openml/extensions/functions.py b/openml/extensions/functions.py
index 93fab5345..826cb0853 100644
--- a/openml/extensions/functions.py
+++ b/openml/extensions/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from typing import Any, Optional, Type, TYPE_CHECKING
 from . import Extension
 # Need to implement the following by its full path because otherwise it won't be possible to
diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
index a9d1db37b..1c1732cde 100644
--- a/openml/extensions/sklearn/__init__.py
+++ b/openml/extensions/sklearn/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .extension import SklearnExtension
 from openml.extensions import register_extension
 
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index cc3352a20..ca6c77458 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict  # noqa: F401
 import copy
 from distutils.version import LooseVersion
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
index 3bbf1b21b..f2c16a8a0 100644
--- a/openml/flows/__init__.py
+++ b/openml/flows/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .flow import OpenMLFlow
 
 from .functions import get_flow, list_flows, flow_exists, get_flow_id, assert_flows_equal
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 732f54208..bd8d97d7c 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import os
 from typing import Dict, List, Union, Tuple, Optional  # noqa: F401
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 4389eb3c0..5bbbcbd16 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import dateutil.parser
 from collections import OrderedDict
 import os
diff --git a/openml/runs/__init__.py b/openml/runs/__init__.py
index 76aabcbc4..80d0c0ae3 100644
--- a/openml/runs/__init__.py
+++ b/openml/runs/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .run import OpenMLRun
 from .trace import OpenMLRunTrace, OpenMLTraceIteration
 from .functions import (
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 95407d517..aefc2162a 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import io
 import itertools
diff --git a/openml/runs/run.py b/openml/runs/run.py
index e3df97083..7229cfb00 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import pickle
 import time
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index c6ca1f057..220a10c95 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import json
 import os
diff --git a/openml/setups/__init__.py b/openml/setups/__init__.py
index a8b4a8863..4f0be9571 100644
--- a/openml/setups/__init__.py
+++ b/openml/setups/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .setup import OpenMLSetup, OpenMLParameter
 from .functions import get_setup, list_setups, setup_exists, initialize_model
 
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 97c001b24..5f3b796c8 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import io
 import os
diff --git a/openml/setups/setup.py b/openml/setups/setup.py
index 31fdc15a4..36bddb11f 100644
--- a/openml/setups/setup.py
+++ b/openml/setups/setup.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import openml.config
 
 
diff --git a/openml/study/__init__.py b/openml/study/__init__.py
index 02b37d514..8fe308a8c 100644
--- a/openml/study/__init__.py
+++ b/openml/study/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .study import OpenMLStudy, OpenMLBenchmarkSuite
 from .functions import (
     get_study,
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 25ebea5fd..35889c68d 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from typing import cast, Dict, List, Optional, Union
 import warnings
 
diff --git a/openml/study/study.py b/openml/study/study.py
index 64d47dce7..955546781 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 from typing import Dict, List, Optional, Tuple, Union, Any
 
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
index f21cac871..2bd319637 100644
--- a/openml/tasks/__init__.py
+++ b/openml/tasks/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .task import (
     OpenMLTask,
     OpenMLSupervisedTask,
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 4bb93b007..a386dec17 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import io
 import re
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 3815f4257..ad6170a62 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import namedtuple, OrderedDict
 import os
 import pickle
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index f415a3fea..3b1c8abe7 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from abc import ABC
 from collections import OrderedDict
 import io
diff --git a/openml/testing.py b/openml/testing.py
index 370fb9102..7ebf37541 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import hashlib
 import inspect
 import os
diff --git a/openml/utils.py b/openml/utils.py
index a458d3132..09a0f6a83 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import os
 import hashlib
 import xmltodict
diff --git a/setup.py b/setup.py
index f4fbe7991..9c9766636 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 
+# License: BSD 3-Clause
+
 import os
 import setuptools
 import sys
diff --git a/tests/__init__.py b/tests/__init__.py
index dc5287024..b71163cb2 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 # Dummy to allow mock classes in the test files to have a version number for
 # their parent module
 __version__ = '0.1'
diff --git a/tests/conftest.py b/tests/conftest.py
index 056cc7f96..ae8f0dfa9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,6 +20,8 @@
     testing.py in each of the unit test modules.
 '''
 
+# License: BSD 3-Clause
+
 import os
 import logging
 from typing import List
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 9d1076371..f40dc5015 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from time import time
 from warnings import filterwarnings, catch_warnings
 
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index fb363bcf4..2f1a820aa 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import os
 import random
 from itertools import product
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index fe38a5a66..25651a8cc 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import openml
 import openml.evaluations
 from openml.testing import TestBase
diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py
index 490971c1e..50e3e4079 100644
--- a/tests/test_evaluations/test_evaluations_example.py
+++ b/tests/test_evaluations/test_evaluations_example.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import unittest
 
 
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index 76b1f9d0c..3da91b789 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import inspect
 
 import openml.testing
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index a93c79bcd..6bb6b5383 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import collections
 import json
 import re
diff --git a/tests/test_flows/dummy_learn/dummy_forest.py b/tests/test_flows/dummy_learn/dummy_forest.py
index 06eaab62e..613f73852 100644
--- a/tests/test_flows/dummy_learn/dummy_forest.py
+++ b/tests/test_flows/dummy_learn/dummy_forest.py
@@ -1,3 +1,6 @@
+# License: BSD 3-Clause
+
+
 class DummyRegressor(object):
     def fit(self, X, y):
         return self
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 25e2dacfb..7e735d655 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import collections
 import copy
 from distutils.version import LooseVersion
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 91c107b3d..5a189b996 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from collections import OrderedDict
 import copy
 import unittest
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 44cf4862f..d4331a169 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import os
 
 import openml.config
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index a3fdf541c..eda4af948 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from unittest import mock
 
 from openml.testing import TestBase
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 0266ca4d9..1d7c9bb18 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import numpy as np
 import random
 import os
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 4ff39ac6d..d44a000d6 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import arff
 from distutils.version import LooseVersion
 import os
diff --git a/tests/test_runs/test_trace.py b/tests/test_runs/test_trace.py
index 29f3a1554..be339617d 100644
--- a/tests/test_runs/test_trace.py
+++ b/tests/test_runs/test_trace.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from openml.runs import OpenMLRunTrace, OpenMLTraceIteration
 from openml.testing import TestBase
 
diff --git a/tests/test_setups/__init__.py b/tests/test_setups/__init__.py
index dc5287024..b71163cb2 100644
--- a/tests/test_setups/__init__.py
+++ b/tests/test_setups/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 # Dummy to allow mock classes in the test files to have a version number for
 # their parent module
 __version__ = '0.1'
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 16e149544..4dc27c95f 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import hashlib
 import time
 import unittest.mock
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 1d9c56d54..b93565511 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from openml.testing import TestBase, SimpleImputer
 
 
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index e31a40cd2..490fc7226 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import openml
 import openml.study
 from openml.testing import TestBase
diff --git a/tests/test_tasks/__init__.py b/tests/test_tasks/__init__.py
index e823eb2c7..2969dc9dd 100644
--- a/tests/test_tasks/__init__.py
+++ b/tests/test_tasks/__init__.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from .test_task import OpenMLTaskTest
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index e5b7c4415..13068e8cb 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import numpy as np
 
 from openml.tasks import get_task
diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py
index 53152acb5..8f916717a 100644
--- a/tests/test_tasks/test_clustering_task.py
+++ b/tests/test_tasks/test_clustering_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import openml
 from openml.testing import TestBase
 from .test_task import OpenMLTaskTest
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index 625252606..bfcfebcd2 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import numpy as np
 
 from openml.tasks import get_task
diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
index 57ff964cd..fbb3ff607 100644
--- a/tests/test_tasks/test_regression_task.py
+++ b/tests/test_tasks/test_regression_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import numpy as np
 
 from .test_supervised_task import OpenMLSupervisedTaskTest
diff --git a/tests/test_tasks/test_split.py b/tests/test_tasks/test_split.py
index 763bb15f7..fb31a56b3 100644
--- a/tests/test_tasks/test_split.py
+++ b/tests/test_tasks/test_split.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import inspect
 import os
 
diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py
index f7112b1cf..59fe61bc5 100644
--- a/tests/test_tasks/test_supervised_task.py
+++ b/tests/test_tasks/test_supervised_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from typing import Tuple
 import unittest
 
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
index 0154dc2a3..9d80a1dec 100644
--- a/tests/test_tasks/test_task.py
+++ b/tests/test_tasks/test_task.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import unittest
 from typing import List
 from random import randint, shuffle
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index fd64f805d..4a71a83a7 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 import os
 from unittest import mock
 
diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py
index 4a0789414..5cddd7fc4 100644
--- a/tests/test_tasks/test_task_methods.py
+++ b/tests/test_tasks/test_task_methods.py
@@ -1,3 +1,5 @@
+# License: BSD 3-Clause
+
 from time import time
 
 import openml

From 33bf643605fc5ec645200539136865d84679727d Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Wed, 6 Nov 2019 18:06:41 +0100
Subject: [PATCH 03/48] add task_type to list_runs (#857)

* add task_type to list_runs

* length of run change

* changelog

* changes in progress rst
---
 doc/progress.rst                      | 1 +
 openml/runs/functions.py              | 1 +
 tests/test_runs/test_run_functions.py | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index ba6225986..b65df1926 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,6 +8,7 @@ Changelog
 
 0.10.2
 ~~~~~~
+* ADD #857: Adds task type ID to list_runs
 * DOC #862: Added license BSD 3-Clause to each of the source files.
 
 0.10.1
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index aefc2162a..9e7321d45 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -969,6 +969,7 @@ def __list_runs(api_call, output_format='dict'):
                'setup_id': int(run_['oml:setup_id']),
                'flow_id': int(run_['oml:flow_id']),
                'uploader': int(run_['oml:uploader']),
+               'task_type': int(run_['oml:task_type_id']),
                'upload_time': str(run_['oml:upload_time']),
                'error_message': str((run_['oml:error_message']) or '')}
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index d44a000d6..2773bc8d9 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1128,7 +1128,7 @@ def _check_run(self, run):
         # error_message and run_details exist, too, but are not used so far. We need to update
         # this check once they are used!
         self.assertIsInstance(run, dict)
-        assert len(run) == 7, str(run)
+        assert len(run) == 8, str(run)
 
     def test_get_runs_list(self):
         # TODO: comes from live, no such lists on test

From e5e385825d206ab420d34f1dcc5c30bc5db866f8 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 7 Nov 2019 11:12:26 +0100
Subject: [PATCH 04/48] Prepare new release (#868)

---
 openml/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/__version__.py b/openml/__version__.py
index ec9e2af03..11a584d41 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.10.1"
+__version__ = "0.10.2"

From 46df5299d33343ee4e0dbe052d11fe5c5dea2047 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 7 Nov 2019 16:50:21 +0100
Subject: [PATCH 05/48] start version 0.11.0dev (#872)

---
 doc/progress.rst      | 3 +++
 openml/__version__.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index b65df1926..84a94c42a 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,6 +6,9 @@
 Changelog
 =========
 
+0.11.0
+~~~~~~
+
 0.10.2
 ~~~~~~
 * ADD #857: Adds task type ID to list_runs
diff --git a/openml/__version__.py b/openml/__version__.py
index 11a584d41..338948217 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.10.2"
+__version__ = "0.11.0dev"

From fb1c1d94b2bce8b452581ba912530eb4dc1f3e9f Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 8 Nov 2019 15:23:39 +0100
Subject: [PATCH 06/48] Do not populate server base URL on startup (#873)

* do not populate server base URL on startup

* update changelog

* fix pep8
---
 doc/progress.rst      |  3 +++
 openml/base.py        |  2 +-
 openml/config.py      | 15 ++++++++++++++-
 openml/runs/run.py    |  2 +-
 openml/study/study.py |  2 +-
 openml/tasks/task.py  |  2 +-
 6 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 84a94c42a..b7d4b4992 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,6 +9,9 @@ Changelog
 0.11.0
 ~~~~~~
 
+* FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
+  switching the server
+
 0.10.2
 ~~~~~~
 * ADD #857: Adds task type ID to list_runs
diff --git a/openml/base.py b/openml/base.py
index e02aabb0f..1e98efcca 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -36,7 +36,7 @@ def openml_url(self) -> Optional[str]:
     def url_for_id(cls, id_: int) -> str:
         """ Return the OpenML URL for the object of the class entity with the given id. """
         # Sample url for a flow: openml.org/f/123
-        return "{}/{}/{}".format(openml.config.server_base_url, cls._entity_letter(), id_)
+        return "{}/{}/{}".format(openml.config.get_server_base_url(), cls._entity_letter(), id_)
 
     @classmethod
     def _entity_letter(cls) -> str:
diff --git a/openml/config.py b/openml/config.py
index eee2c7fdb..0f2f6e92b 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -58,7 +58,20 @@ def configure_logging(console_output_level: int, file_output_level: int):
 # Default values are actually added here in the _setup() function which is
 # called at the end of this module
 server = str(_defaults['server'])  # so mypy knows it is a string
-server_base_url = server[:-len('/api/v1/xml')]
+
+
+def get_server_base_url() -> str:
+    """Return the base URL of the currently configured server.
+
+    Turns ``"https://www.openml.org/api/v1/xml"`` in ``"https://www.openml.org/"``
+
+    Returns
+    =======
+    str
+    """
+    return server[:-len('/api/v1/xml')]
+
+
 apikey = _defaults['apikey']
 # The current cache directory (without the server name)
 cache_directory = str(_defaults['cachedir'])  # so mypy knows it is a string
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 7229cfb00..140347cc4 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -88,7 +88,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
                   "Dataset ID": self.dataset_id,
                   "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id)}
         if self.uploader is not None:
-            fields["Uploader Profile"] = "{}/u/{}".format(openml.config.server_base_url,
+            fields["Uploader Profile"] = "{}/u/{}".format(openml.config.get_server_base_url(),
                                                           self.uploader)
         if self.run_id is not None:
             fields["Run URL"] = self.openml_url
diff --git a/openml/study/study.py b/openml/study/study.py
index 955546781..483804e03 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -106,7 +106,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["ID"] = self.study_id
             fields["Study URL"] = self.openml_url
         if self.creator is not None:
-            fields["Creator"] = "{}/u/{}".format(openml.config.server_base_url, self.creator)
+            fields["Creator"] = "{}/u/{}".format(openml.config.get_server_base_url(), self.creator)
         if self.creation_date is not None:
             fields["Upload Time"] = self.creation_date.replace('T', ' ')
         if self.data is not None:
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 3b1c8abe7..0b79c2eca 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -68,7 +68,7 @@ def id(self) -> Optional[int]:
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
         fields = {"Task Type Description": '{}/tt/{}'.format(
-            openml.config.server_base_url, self.task_type_id)}  # type: Dict[str, Any]
+            openml.config.get_server_base_url(), self.task_type_id)}  # type: Dict[str, Any]
         if self.task_id is not None:
             fields["Task ID"] = self.task_id
             fields["Task URL"] = self.openml_url

From c02096b043eb30615002e3e650f5c0dad4cdd958 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Mon, 11 Nov 2019 09:11:39 +0100
Subject: [PATCH 07/48] Add cite me (#874)

* Ask users to cite us

* improve reference

* Remove linebreak from bibtex block.
---
 README.md     | 30 +++++++++++++++++++++++++++---
 doc/index.rst | 22 ++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 63e33155b..732085697 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,14 @@
-[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
+# OpenML-Python
 
 A python interface for [OpenML](http://openml.org), an online platform for open science collaboration in machine learning.
 It can be used to download or upload OpenML data such as datasets and machine learning experiment results.
-You can find the documentation on the [openml-python website](https://openml.github.io/openml-python).
-If you wish to contribute to the package, please see our [contribution guidelines](https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md).
+
+## General
+
+* [Documentation](https://openml.github.io/openml-python).
+* [Contribution guidelines](https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md).
+
+[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
 
 Master branch:
 
@@ -16,3 +21,22 @@ Development branch:
 [![Build Status](https://travis-ci.org/openml/openml-python.svg?branch=develop)](https://travis-ci.org/openml/openml-python)
 [![Build status](https://ci.appveyor.com/api/projects/status/blna1eip00kdyr25/branch/develop?svg=true)](https://ci.appveyor.com/project/OpenML/openml-python/branch/develop)
 [![Coverage Status](https://coveralls.io/repos/github/openml/openml-python/badge.svg?branch=develop)](https://coveralls.io/github/openml/openml-python?branch=develop)
+
+## Citing OpenML-Python
+
+If you use OpenML-Python in a scientific publication, we would appreciate a reference to the
+following paper:
+
+[Matthias Feurer, Jan N. van Rijn, Arlind Kadra, Pieter Gijsbers, Neeratyoy Mallik, Sahithya Ravi, Andreas Müller, Joaquin Vanschoren, Frank Hutter<br/>
+**OpenML-Python: an extensible Python API for OpenML**<br/>
+*arXiv:1911.02490 [cs.LG]*](https://arxiv.org/abs/1911.02490)
+
+Bibtex entry:
+```bibtex
+@article{feurer-arxiv19a,
+  author    = {Matthias Feurer and Jan N. van Rijn and Arlind Kadra and Pieter Gijsbers and Neeratyoy Mallik and Sahithya Ravi and Andreas Müller and Joaquin Vanschoren and Frank Hutter},
+  title     = {OpenML-Python: an extensible Python API for OpenML},
+  journal   = {arXiv:1911.02490},
+  year      = {2019},
+}
+```
diff --git a/doc/index.rst b/doc/index.rst
index 8d7cf2243..789979023 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -84,3 +84,25 @@ Contribution to the OpenML package is highly appreciated. The OpenML package
 currently has a 1/4 position for the development and all help possible is
 needed to extend and maintain the package, create new examples and improve
 the usability. Please see the :ref:`contributing` page for more information.
+
+--------------------
+Citing OpenML-Python
+--------------------
+
+If you use OpenML-Python in a scientific publication, we would appreciate a
+reference to the following paper:
+
+
+ `OpenML-Python: an extensible Python API for OpenML
+ <https://arxiv.org/abs/1911.02490>`_,
+ Feurer *et al.*, arXiv:1911.02490.
+
+ Bibtex entry::
+
+     @article{feurer-arxiv19a,
+         author    = {Matthias Feurer and Jan N. van Rijn and Arlind Kadra and Pieter Gijsbers and Neeratyoy Mallik and Sahithya Ravi and Andreas Müller and Joaquin Vanschoren and Frank Hutter},
+         title     = {OpenML-Python: an extensible Python API for OpenML},
+         journal   = {arXiv:1911.02490},
+         year      = {2019},
+     }
+

From a1cfd6e56a31fec21635d4c858f04df79f472237 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Mon, 11 Nov 2019 14:47:26 +0100
Subject: [PATCH 08/48] Adding option to print logs during an api call (#833)

* Adding option to print logs during an api call

* Adding timing to log and changing string interpolation

* Improving logging and timing of api calls

* PEP8

* PEP8
---
 openml/_api_calls.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 5068dc208..888afa18e 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -1,11 +1,11 @@
 # License: BSD 3-Clause
 
 import time
-from typing import Dict
+import logging
 import requests
 import warnings
-
 import xmltodict
+from typing import Dict
 
 from . import config
 from .exceptions import (OpenMLServerError, OpenMLServerException,
@@ -45,13 +45,22 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     url += call
 
     url = url.replace('=', '%3d')
-
+    logging.info('Starting [%s] request for the URL %s', request_method, url)
+    start = time.time()
     if file_elements is not None:
         if request_method != 'post':
             raise ValueError('request method must be post when file elements '
                              'are present')
-        return _read_url_files(url, data=data, file_elements=file_elements)
-    return _read_url(url, request_method, data)
+        response = _read_url_files(url, data=data, file_elements=file_elements)
+    else:
+        response = _read_url(url, request_method, data)
+    logging.info(
+        '%.7fs taken for [%s] request for the URL %s',
+        time.time() - start,
+        request_method,
+        url,
+    )
+    return response
 
 
 def _file_id_to_url(file_id, filename=None):

From a1e2c34b9fff5ef91187116bed82ad2295c705f8 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Tue, 12 Nov 2019 11:25:21 +0100
Subject: [PATCH 09/48] improve sdist handling (#877)

* improve sdsit handling

* fix changelog

* fix pytest installation

* install test dependencies extra

* fix sdist
---
 .travis.yml           |  2 +-
 Makefile              |  2 +-
 ci_scripts/install.sh | 19 ++++++++++++++-----
 ci_scripts/test.sh    |  6 ------
 doc/progress.rst      |  2 ++
 setup.py              |  7 ++++++-
 6 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index beaa3b53e..c1c397967 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ env:
   - MODULE=openml
   matrix:
   - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.21.2"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
diff --git a/Makefile b/Makefile
index c36acbe9f..165bcea80 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ all: clean inplace test
 
 clean:
 	$(PYTHON) setup.py clean
-	rm -rf dist
+	rm -rf dist openml.egg-info
 
 in: inplace # just a shortcut
 inplace:
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 5c338fe5e..15cb84bca 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -32,15 +32,24 @@ source activate testenv
 if [[ -v SCIPY_VERSION ]]; then
     conda install --yes scipy=$SCIPY_VERSION
 fi
-
 python --version
-pip install -e '.[test]'
+
+if [[ "$TEST_DIST" == "true" ]]; then
+    pip install twine nbconvert jupyter_client matplotlib pytest pytest-xdist pytest-timeout \
+        nbformat oslo.concurrency flaky
+    python setup.py sdist
+    # Find file which was modified last as done in https://stackoverflow.com/a/4561987
+    dist=`find dist -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -f2- -d" "`
+    echo "Installing $dist"
+    pip install "$dist"
+    twine check "$dist"
+else
+    pip install -e '.[test]'
+fi
+
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
 
-if [[ "$DOCTEST" == "true" ]]; then
-    pip install sphinx_bootstrap_theme
-fi
 if [[ "$DOCPUSH" == "true" ]]; then
     conda install --yes gxx_linux-64 gcc_linux-64 swig
     pip install -e '.[examples,examples_unix]'
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index 8659a105b..5ffced544 100644
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -15,14 +15,8 @@ run_tests() {
 
     cwd=`pwd`
     test_dir=$cwd/tests
-    doctest_dir=$cwd/doc
 
     cd $TEST_DIR
-    if [[ "$EXAMPLES" == "true" ]]; then
-        pytest -sv $test_dir/test_examples/
-    elif [[ "$DOCTEST" == "true" ]]; then
-        python -m doctest $doctest_dir/usage.rst
-    fi
 
     if [[ "$COVERAGE" == "true" ]]; then
         PYTEST_ARGS='--cov=openml'
diff --git a/doc/progress.rst b/doc/progress.rst
index b7d4b4992..52fdf283d 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -11,6 +11,8 @@ Changelog
 
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server
+* MAINT #767: Source distribution installation is now unit-tested.
+* MAINT #865: OpenML no longer bundles test files in the source distribution.
 
 0.10.2
 ~~~~~~
diff --git a/setup.py b/setup.py
index 9c9766636..46e4ae8b2 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,12 @@
                      "Source Code": "https://github.com/openml/openml-python"
                  },
                  version=version,
-                 packages=setuptools.find_packages(),
+                 # Make sure to remove stale files such as the egg-info before updating this:
+                 # https://stackoverflow.com/a/26547314
+                 packages=setuptools.find_packages(
+                     include=['openml.*', 'openml'],
+                     exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
+                 ),
                  package_data={'': ['*.txt', '*.md']},
                  python_requires=">=3.5",
                  install_requires=[

From 69d443f18d52d70e8b730061904b68576a4786b7 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Tue, 12 Nov 2019 13:54:07 +0100
Subject: [PATCH 10/48] add support for MLP HP layer_sizes (#879)

---
 openml/extensions/sklearn/extension.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index ca6c77458..b3a194756 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -1927,9 +1927,10 @@ def _obtain_arff_trace(
                             param_value is None or param_value is np.ma.masked:
                         # basic string values
                         type = 'STRING'
-                    elif isinstance(param_value, list) and \
+                    elif isinstance(param_value, (list, tuple)) and \
                             all(isinstance(i, int) for i in param_value):
-                        # list of integers
+                        # list of integers (usually for selecting features)
+                        # hyperparameter layer_sizes of MLPClassifier
                         type = 'STRING'
                     else:
                         raise TypeError('Unsupported param type in param grid: %s' % key)

From d79a98ced2a1bf4d523c823bf389f3c4fea7a8d4 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Nov 2019 09:46:54 +0100
Subject: [PATCH 11/48] add better error message for too-long URI (#881)

* add better error message for too-long URI

* improve error handling

* improve data download function, fix bugs

* stricter API, more private methods

* incorporate Pieter's feedback
---
 openml/_api_calls.py                  | 135 ++++++++++++++++++++------
 openml/datasets/functions.py          |   8 +-
 openml/runs/run.py                    |   3 +-
 openml/tasks/task.py                  |  10 +-
 openml/utils.py                       |  51 ----------
 tests/test_openml/test_api_calls.py   |  12 +++
 tests/test_runs/test_run_functions.py |   3 +-
 tests/test_utils/test_utils.py        |   2 +-
 8 files changed, 127 insertions(+), 97 deletions(-)
 create mode 100644 tests/test_openml/test_api_calls.py

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 888afa18e..c357dc3d0 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -1,15 +1,15 @@
 # License: BSD 3-Clause
 
 import time
+import hashlib
 import logging
 import requests
-import warnings
 import xmltodict
-from typing import Dict
+from typing import Dict, Optional
 
 from . import config
 from .exceptions import (OpenMLServerError, OpenMLServerException,
-                         OpenMLServerNoResult)
+                         OpenMLServerNoResult, OpenMLHashException)
 
 
 def _perform_api_call(call, request_method, data=None, file_elements=None):
@@ -47,20 +47,105 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     url = url.replace('=', '%3d')
     logging.info('Starting [%s] request for the URL %s', request_method, url)
     start = time.time()
+
     if file_elements is not None:
         if request_method != 'post':
-            raise ValueError('request method must be post when file elements '
-                             'are present')
-        response = _read_url_files(url, data=data, file_elements=file_elements)
+            raise ValueError('request method must be post when file elements are present')
+        response = __read_url_files(url, data=data, file_elements=file_elements)
     else:
-        response = _read_url(url, request_method, data)
+        response = __read_url(url, request_method, data)
+
+    __check_response(response, url, file_elements)
+
     logging.info(
         '%.7fs taken for [%s] request for the URL %s',
         time.time() - start,
         request_method,
         url,
     )
-    return response
+    return response.text
+
+
+def _download_text_file(source: str,
+                        output_path: Optional[str] = None,
+                        md5_checksum: str = None,
+                        exists_ok: bool = True,
+                        encoding: str = 'utf8',
+                        ) -> Optional[str]:
+    """ Download the text file at `source` and store it in `output_path`.
+
+    By default, do nothing if a file already exists in `output_path`.
+    The downloaded file can be checked against an expected md5 checksum.
+
+    Parameters
+    ----------
+    source : str
+        url of the file to be downloaded
+    output_path : str, (optional)
+        full path, including filename, of where the file should be stored. If ``None``,
+        this function returns the downloaded file as string.
+    md5_checksum : str, optional (default=None)
+        If not None, should be a string of hexidecimal digits of the expected digest value.
+    exists_ok : bool, optional (default=True)
+        If False, raise an FileExistsError if there already exists a file at `output_path`.
+    encoding : str, optional (default='utf8')
+        The encoding with which the file should be stored.
+    """
+    if output_path is not None:
+        try:
+            with open(output_path, encoding=encoding):
+                if exists_ok:
+                    return None
+                else:
+                    raise FileExistsError
+        except FileNotFoundError:
+            pass
+
+    logging.info('Starting [%s] request for the URL %s', 'get', source)
+    start = time.time()
+    response = __read_url(source, request_method='get')
+    __check_response(response, source, None)
+    downloaded_file = response.text
+
+    if md5_checksum is not None:
+        md5 = hashlib.md5()
+        md5.update(downloaded_file.encode('utf-8'))
+        md5_checksum_download = md5.hexdigest()
+        if md5_checksum != md5_checksum_download:
+            raise OpenMLHashException(
+                'Checksum {} of downloaded file is unequal to the expected checksum {}.'
+                .format(md5_checksum_download, md5_checksum))
+
+    if output_path is None:
+        logging.info(
+            '%.7fs taken for [%s] request for the URL %s',
+            time.time() - start,
+            'get',
+            source,
+        )
+        return downloaded_file
+
+    else:
+        with open(output_path, "w", encoding=encoding) as fh:
+            fh.write(downloaded_file)
+
+        logging.info(
+            '%.7fs taken for [%s] request for the URL %s',
+            time.time() - start,
+            'get',
+            source,
+        )
+
+        del downloaded_file
+        return None
+
+
+def __check_response(response, url, file_elements):
+    if response.status_code != 200:
+        raise __parse_server_exception(response, url, file_elements=file_elements)
+    elif 'Content-Encoding' not in response.headers or \
+            response.headers['Content-Encoding'] != 'gzip':
+        logging.warning('Received uncompressed content from OpenML for {}.'.format(url))
 
 
 def _file_id_to_url(file_id, filename=None):
@@ -75,7 +160,7 @@ def _file_id_to_url(file_id, filename=None):
     return url
 
 
-def _read_url_files(url, data=None, file_elements=None):
+def __read_url_files(url, data=None, file_elements=None):
     """do a post request to url with data
     and sending file_elements as files"""
 
@@ -85,37 +170,24 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = send_request(
+    response = __send_request(
         request_method='post',
         url=url,
         data=data,
         files=file_elements,
     )
-    if response.status_code != 200:
-        raise _parse_server_exception(response, url, file_elements=file_elements)
-    if 'Content-Encoding' not in response.headers or \
-            response.headers['Content-Encoding'] != 'gzip':
-        warnings.warn('Received uncompressed content from OpenML for {}.'
-                      .format(url))
-    return response.text
+    return response
 
 
-def _read_url(url, request_method, data=None):
+def __read_url(url, request_method, data=None):
     data = {} if data is None else data
     if config.apikey is not None:
         data['api_key'] = config.apikey
 
-    response = send_request(request_method=request_method, url=url, data=data)
-    if response.status_code != 200:
-        raise _parse_server_exception(response, url, file_elements=None)
-    if 'Content-Encoding' not in response.headers or \
-            response.headers['Content-Encoding'] != 'gzip':
-        warnings.warn('Received uncompressed content from OpenML for {}.'
-                      .format(url))
-    return response.text
+    return __send_request(request_method=request_method, url=url, data=data)
 
 
-def send_request(
+def __send_request(
     request_method,
     url,
     data,
@@ -149,16 +221,19 @@ def send_request(
     return response
 
 
-def _parse_server_exception(
+def __parse_server_exception(
     response: requests.Response,
     url: str,
     file_elements: Dict,
 ) -> OpenMLServerError:
-    # OpenML has a sophisticated error system
-    # where information about failures is provided. try to parse this
+
+    if response.status_code == 414:
+        raise OpenMLServerError('URI too long! ({})'.format(url))
     try:
         server_exception = xmltodict.parse(response.text)
     except Exception:
+        # OpenML has a sophisticated error system
+        # where information about failures is provided. try to parse this
         raise OpenMLServerError(
             'Unexpected server error when calling {}. Please contact the developers!\n'
             'Status code: {}\n{}'.format(url, response.status_code, response.text))
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index e85c55aa3..657fbc7c6 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -886,7 +886,7 @@ def _get_dataset_arff(description: Union[Dict, OpenMLDataset],
     output_file_path = os.path.join(cache_directory, "dataset.arff")
 
     try:
-        openml.utils._download_text_file(
+        openml._api_calls._download_text_file(
             source=url,
             output_path=output_file_path,
             md5_checksum=md5_checksum_fixture
@@ -1038,13 +1038,11 @@ def _get_online_dataset_arff(dataset_id):
     str
         A string representation of an ARFF file.
     """
-    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id,
-                                                      'get')
+    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, 'get')
     # build a dict from the xml.
     # use the url from the dataset description and return the ARFF string
-    return openml._api_calls._read_url(
+    return openml._api_calls._download_text_file(
         xmltodict.parse(dataset_xml)['oml:data_set_description']['oml:url'],
-        request_method='get'
     )
 
 
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 140347cc4..910801971 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -327,8 +327,7 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
             predictions_file_url = openml._api_calls._file_id_to_url(
                 self.output_files['predictions'], 'predictions.arff',
             )
-            response = openml._api_calls._read_url(predictions_file_url,
-                                                   request_method='get')
+            response = openml._api_calls._download_text_file(predictions_file_url)
             predictions_arff = arff.loads(response)
             # TODO: make this a stream reader
         else:
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 0b79c2eca..72c12bab5 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -116,12 +116,10 @@ def _download_split(self, cache_file: str):
                 pass
         except (OSError, IOError):
             split_url = self.estimation_procedure["data_splits_url"]
-            split_arff = openml._api_calls._read_url(split_url,
-                                                     request_method='get')
-
-            with io.open(cache_file, "w", encoding='utf8') as fh:
-                fh.write(split_arff)
-            del split_arff
+            openml._api_calls._download_text_file(
+                source=str(split_url),
+                output_path=cache_file,
+            )
 
     def download_split(self) -> OpenMLSplit:
         """Download the OpenML split for a given task.
diff --git a/openml/utils.py b/openml/utils.py
index 09a0f6a83..2815f1afd 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -1,7 +1,6 @@
 # License: BSD 3-Clause
 
 import os
-import hashlib
 import xmltodict
 import shutil
 from typing import TYPE_CHECKING, List, Tuple, Union, Type
@@ -366,53 +365,3 @@ def _create_lockfiles_dir():
     except OSError:
         pass
     return dir
-
-
-def _download_text_file(source: str,
-                        output_path: str,
-                        md5_checksum: str = None,
-                        exists_ok: bool = True,
-                        encoding: str = 'utf8',
-                        ) -> None:
-    """ Download the text file at `source` and store it in `output_path`.
-
-    By default, do nothing if a file already exists in `output_path`.
-    The downloaded file can be checked against an expected md5 checksum.
-
-    Parameters
-    ----------
-    source : str
-        url of the file to be downloaded
-    output_path : str
-        full path, including filename, of where the file should be stored.
-    md5_checksum : str, optional (default=None)
-        If not None, should be a string of hexidecimal digits of the expected digest value.
-    exists_ok : bool, optional (default=True)
-        If False, raise an FileExistsError if there already exists a file at `output_path`.
-    encoding : str, optional (default='utf8')
-        The encoding with which the file should be stored.
-    """
-    try:
-        with open(output_path, encoding=encoding):
-            if exists_ok:
-                return
-            else:
-                raise FileExistsError
-    except FileNotFoundError:
-        pass
-
-    downloaded_file = openml._api_calls._read_url(source, request_method='get')
-
-    if md5_checksum is not None:
-        md5 = hashlib.md5()
-        md5.update(downloaded_file.encode('utf-8'))
-        md5_checksum_download = md5.hexdigest()
-        if md5_checksum != md5_checksum_download:
-            raise openml.exceptions.OpenMLHashException(
-                'Checksum {} of downloaded file is unequal to the expected checksum {}.'
-                .format(md5_checksum_download, md5_checksum))
-
-    with open(output_path, "w", encoding=encoding) as fh:
-        fh.write(downloaded_file)
-
-    del downloaded_file
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
new file mode 100644
index 000000000..1748608bb
--- /dev/null
+++ b/tests/test_openml/test_api_calls.py
@@ -0,0 +1,12 @@
+import openml
+import openml.testing
+
+
+class TestConfig(openml.testing.TestBase):
+
+    def test_too_long_uri(self):
+        with self.assertRaisesRegex(
+            openml.exceptions.OpenMLServerError,
+            'URI too long!',
+        ):
+            openml.datasets.list_datasets(data_id=list(range(10000)))
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 2773bc8d9..fe8aab808 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -119,8 +119,7 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed):
         # downloads the predictions of the old task
         file_id = run.output_files['predictions']
         predictions_url = openml._api_calls._file_id_to_url(file_id)
-        response = openml._api_calls._read_url(predictions_url,
-                                               request_method='get')
+        response = openml._api_calls._download_text_file(predictions_url)
         predictions = arff.loads(response)
         run_prime = openml.runs.run_model_on_task(
             model=model_prime,
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index de2d18981..152dd4dba 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -16,7 +16,7 @@ class OpenMLTaskTest(TestBase):
     def mocked_perform_api_call(call, request_method):
         # TODO: JvR: Why is this not a staticmethod?
         url = openml.config.server + '/' + call
-        return openml._api_calls._read_url(url, request_method=request_method)
+        return openml._api_calls._download_text_file(url)
 
     def test_list_all(self):
         openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)

From 2b7e740d8c3d6933a56e76c43048c3159bdd0b86 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Mon, 18 Nov 2019 15:51:48 +0100
Subject: [PATCH 12/48] To handle non-actionable steps in sklearn (#866)

* Initial changes to handle reproducible example from the issue

* Making tentative changes; Need to test deserialization

* Fixing deserialization when empty steps in sklearn model

* Fixing flake issues, failing test cases

* Fixing test cases

* Dropping support for 'None' as sklearn estimator

* Adding test case for None estimator
---
 openml/extensions/sklearn/extension.py        | 75 +++++++++++--------
 .../test_sklearn_extension.py                 | 75 ++++++++++++++++++-
 2 files changed, 114 insertions(+), 36 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index b3a194756..9720bd853 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -696,10 +696,14 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
         # will be part of the name (in brackets)
         sub_components_names = ""
         for key in subcomponents:
+            if isinstance(subcomponents[key], OpenMLFlow):
+                name = subcomponents[key].name
+            elif isinstance(subcomponents[key], str):  # 'drop', 'passthrough' can be passed
+                name = subcomponents[key]
             if key in subcomponents_explicit:
-                sub_components_names += "," + key + "=" + subcomponents[key].name
+                sub_components_names += "," + key + "=" + name
             else:
-                sub_components_names += "," + subcomponents[key].name
+                sub_components_names += "," + name
 
         if sub_components_names:
             # slice operation on string in order to get rid of leading comma
@@ -771,6 +775,9 @@ def _get_external_version_string(
         external_versions.add(openml_version)
         external_versions.add(sklearn_version)
         for visitee in sub_components.values():
+            # 'drop', 'passthrough', None can be passed as estimators
+            if isinstance(visitee, str):
+                continue
             for external_version in visitee.external_version.split(','):
                 external_versions.add(external_version)
         return ','.join(list(sorted(external_versions)))
@@ -783,9 +790,12 @@ def _check_multiple_occurence_of_component_in_flow(
         to_visit_stack = []  # type: List[OpenMLFlow]
         to_visit_stack.extend(sub_components.values())
         known_sub_components = set()  # type: Set[str]
+
         while len(to_visit_stack) > 0:
             visitee = to_visit_stack.pop()
-            if visitee.name in known_sub_components:
+            if isinstance(visitee, str):  # 'drop', 'passthrough' can be passed as estimators
+                known_sub_components.add(visitee)
+            elif visitee.name in known_sub_components:
                 raise ValueError('Found a second occurence of component %s when '
                                  'trying to serialize %s.' % (visitee.name, model))
             else:
@@ -822,7 +832,7 @@ def _extract_information_from_model(
             def flatten_all(list_):
                 """ Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1]). """
                 for el in list_:
-                    if isinstance(el, (list, tuple)):
+                    if isinstance(el, (list, tuple)) and len(el) > 0:
                         yield from flatten_all(el)
                     else:
                         yield el
@@ -852,17 +862,31 @@ def flatten_all(list_):
                 parameter_value = list()  # type: List
                 reserved_keywords = set(model.get_params(deep=False).keys())
 
-                for sub_component_tuple in rval:
+                for i, sub_component_tuple in enumerate(rval):
                     identifier = sub_component_tuple[0]
                     sub_component = sub_component_tuple[1]
-                    sub_component_type = type(sub_component_tuple)
+                    # sub_component_type = type(sub_component_tuple)
                     if not 2 <= len(sub_component_tuple) <= 3:
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
                         msg = 'Length of tuple does not match assumptions'
                         raise ValueError(msg)
-                    if not isinstance(sub_component, (OpenMLFlow, type(None))):
+
+                    if isinstance(sub_component, str):
+                        if sub_component != 'drop' and sub_component != 'passthrough':
+                            msg = 'Second item of tuple does not match assumptions. ' \
+                                  'If string, can be only \'drop\' or \'passthrough\' but' \
+                                  'got %s' % sub_component
+                            raise ValueError(msg)
+                        else:
+                            pass
+                    elif isinstance(sub_component, type(None)):
+                        msg = 'Cannot serialize objects of None type. Please use a valid ' \
+                              'placeholder for None. Note that empty sklearn estimators can be '\
+                              'replaced with \'drop\' or \'passthrough\'.'
+                        raise ValueError(msg)
+                    elif not isinstance(sub_component, OpenMLFlow):
                         msg = 'Second item of tuple does not match assumptions. ' \
                               'Expected OpenMLFlow, got %s' % type(sub_component)
                         raise TypeError(msg)
@@ -875,31 +899,18 @@ def flatten_all(list_):
                                                         identifier)
                         raise PyOpenMLError(msg)
 
-                    if sub_component is None:
-                        # In a FeatureUnion it is legal to have a None step
-
-                        pv = [identifier, None]
-                        if sub_component_type is tuple:
-                            parameter_value.append(tuple(pv))
-                        else:
-                            parameter_value.append(pv)
-
-                    else:
-                        # Add the component to the list of components, add a
-                        # component reference as a placeholder to the list of
-                        # parameters, which will be replaced by the real component
-                        # when deserializing the parameter
-                        sub_components_explicit.add(identifier)
-                        sub_components[identifier] = sub_component
-                        component_reference = OrderedDict()  # type: Dict[str, Union[str, Dict]]
-                        component_reference['oml-python:serialized_object'] = 'component_reference'
-                        cr_value = OrderedDict()  # type: Dict[str, Any]
-                        cr_value['key'] = identifier
-                        cr_value['step_name'] = identifier
-                        if len(sub_component_tuple) == 3:
-                            cr_value['argument_1'] = sub_component_tuple[2]
-                        component_reference['value'] = cr_value
-                        parameter_value.append(component_reference)
+                    # when deserializing the parameter
+                    sub_components_explicit.add(identifier)
+                    sub_components[identifier] = sub_component
+                    component_reference = OrderedDict()  # type: Dict[str, Union[str, Dict]]
+                    component_reference['oml-python:serialized_object'] = 'component_reference'
+                    cr_value = OrderedDict()  # type: Dict[str, Any]
+                    cr_value['key'] = identifier
+                    cr_value['step_name'] = identifier
+                    if len(sub_component_tuple) == 3:
+                        cr_value['argument_1'] = sub_component_tuple[2]
+                    component_reference['value'] = cr_value
+                    parameter_value.append(component_reference)
 
                 # Here (and in the elif and else branch below) are the only
                 # places where we encode a value as json to make sure that all
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 6bb6b5383..bce58077c 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -30,7 +30,8 @@
 import sklearn.preprocessing
 import sklearn.tree
 import sklearn.cluster
-
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 import openml
 from openml.extensions.sklearn import SklearnExtension
@@ -609,6 +610,8 @@ def test_serialize_column_transformer_pipeline(self):
         serialization2 = self.extension.model_to_flow(new_model)
         assert_flows_equal(serialization, serialization2)
 
+    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
+                     reason="Pipeline processing behaviour updated")
     def test_serialize_feature_union(self):
         ohe_params = {'sparse': False}
         if LooseVersion(sklearn.__version__) >= "0.20":
@@ -675,16 +678,17 @@ def test_serialize_feature_union(self):
         self.assertEqual(new_model_params, fu_params)
         new_model.fit(self.X, self.y)
 
-        fu.set_params(scaler=None)
+        fu.set_params(scaler='drop')
         serialization = self.extension.model_to_flow(fu)
         self.assertEqual(serialization.name,
                          'sklearn.pipeline.FeatureUnion('
-                         'ohe=sklearn.preprocessing.{}.OneHotEncoder)'
+                         'ohe=sklearn.preprocessing.{}.OneHotEncoder,'
+                         'scaler=drop)'
                          .format(module_name_encoder))
         new_model = self.extension.flow_to_model(serialization)
         self.assertEqual(type(new_model), type(fu))
         self.assertIsNot(new_model, fu)
-        self.assertIs(new_model.transformer_list[1][1], None)
+        self.assertIs(new_model.transformer_list[1][1], 'drop')
 
     def test_serialize_feature_union_switched_names(self):
         ohe_params = ({'categories': 'auto'}
@@ -1778,3 +1782,66 @@ def test_trim_flow_name(self):
 
         self.assertEqual("weka.IsolationForest",
                          SklearnExtension.trim_flow_name("weka.IsolationForest"))
+
+    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.21",
+                     reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
+                            "Pipeline till 0.20 doesn't support indexing and 'passthrough'")
+    def test_run_on_model_with_empty_steps(self):
+        from sklearn.compose import ColumnTransformer
+        # testing 'drop', 'passthrough', None as non-actionable sklearn estimators
+        dataset = openml.datasets.get_dataset(128)
+        task = openml.tasks.get_task(59)
+
+        X, y, categorical_ind, feature_names = dataset.get_data(
+            target=dataset.default_target_attribute, dataset_format='array')
+        categorical_ind = np.array(categorical_ind)
+        cat_idx, = np.where(categorical_ind)
+        cont_idx, = np.where(~categorical_ind)
+
+        clf = make_pipeline(
+            ColumnTransformer([('cat', make_pipeline(SimpleImputer(strategy='most_frequent'),
+                                                     OneHotEncoder()), cat_idx.tolist()),
+                               ('cont', make_pipeline(SimpleImputer(strategy='median'),
+                                                      StandardScaler()), cont_idx.tolist())])
+        )
+
+        clf = sklearn.pipeline.Pipeline([
+            ('dummystep', 'passthrough'),  # adding 'passthrough' as an estimator
+            ('prep', clf),
+            ('classifier', sklearn.svm.SVC(gamma='auto'))
+        ])
+
+        # adding 'drop' to a ColumnTransformer
+        if not categorical_ind.any():
+            clf[1][0].set_params(cat='drop')
+        if not (~categorical_ind).any():
+            clf[1][0].set_params(cont='drop')
+
+        # serializing model with non-actionable step
+        run, flow = openml.runs.run_model_on_task(model=clf, task=task, return_flow=True)
+
+        self.assertEqual(len(flow.components), 3)
+        self.assertEqual(flow.components['dummystep'], 'passthrough')
+        self.assertTrue(isinstance(flow.components['classifier'], OpenMLFlow))
+        self.assertTrue(isinstance(flow.components['prep'], OpenMLFlow))
+        self.assertTrue(isinstance(flow.components['prep'].components['columntransformer'],
+                        OpenMLFlow))
+        self.assertEqual(flow.components['prep'].components['columntransformer'].components['cat'],
+                         'drop')
+
+        # de-serializing flow to a model with non-actionable step
+        model = self.extension.flow_to_model(flow)
+        model.fit(X, y)
+        self.assertEqual(type(model), type(clf))
+        self.assertNotEqual(model, clf)
+        self.assertEqual(len(model.named_steps), 3)
+        self.assertEqual(model.named_steps['dummystep'], 'passthrough')
+
+    def test_sklearn_serialization_with_none_step(self):
+        msg = 'Cannot serialize objects of None type. Please use a valid ' \
+              'placeholder for None. Note that empty sklearn estimators can be ' \
+              'replaced with \'drop\' or \'passthrough\'.'
+        clf = sklearn.pipeline.Pipeline([('dummystep', None),
+                                         ('classifier', sklearn.svm.SVC(gamma='auto'))])
+        with self.assertRaisesRegex(ValueError, msg):
+            self.extension.model_to_flow(clf)

From d5e46febfa8db4893d9461ec73648079d73a39ac Mon Sep 17 00:00:00 2001
From: m7142yosuke <m7142yosuke@gmail.com>
Date: Fri, 22 Nov 2019 20:56:23 +0900
Subject: [PATCH 13/48] Add support for using run_model_on_task simply (#888)

* Add support for using run_model_on_task simply

* Add unit test

* fix mypy error
---
 openml/runs/functions.py              | 19 ++++++++++----
 tests/test_runs/test_run_functions.py | 38 +++++++++++++++++++--------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 9e7321d45..ddaf3b028 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -25,7 +25,7 @@
     OpenMLRegressionTask, OpenMLSupervisedTask, OpenMLLearningCurveTask
 from .run import OpenMLRun
 from .trace import OpenMLRunTrace
-from ..tasks import TaskTypeEnum
+from ..tasks import TaskTypeEnum, get_task
 
 # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
 if TYPE_CHECKING:
@@ -38,7 +38,7 @@
 
 def run_model_on_task(
     model: Any,
-    task: OpenMLTask,
+    task: Union[int, str, OpenMLTask],
     avoid_duplicate_runs: bool = True,
     flow_tags: List[str] = None,
     seed: int = None,
@@ -54,8 +54,9 @@ def run_model_on_task(
         A model which has a function fit(X,Y) and predict(X),
         all supervised estimators of scikit learn follow this definition of a model [1]
         [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
-    task : OpenMLTask
-        Task to perform. This may be a model instead if the first argument is an OpenMLTask.
+    task : OpenMLTask or int or str
+        Task to perform or Task id.
+        This may be a model instead if the first argument is an OpenMLTask.
     avoid_duplicate_runs : bool, optional (default=True)
         If True, the run will throw an error if the setup/task combination is already present on
         the server. This feature requires an internet connection.
@@ -84,7 +85,7 @@ def run_model_on_task(
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     # When removing this please also remove the method `is_estimator` from the extension
     # interface as it is only used here (MF, 3-2019)
-    if isinstance(model, OpenMLTask):
+    if isinstance(model, (int, str, OpenMLTask)):
         warnings.warn("The old argument order (task, model) is deprecated and "
                       "will not be supported in the future. Please use the "
                       "order (model, task).", DeprecationWarning)
@@ -98,6 +99,14 @@ def run_model_on_task(
 
     flow = extension.model_to_flow(model)
 
+    def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
+        if isinstance(task, (int, str)):
+            return get_task(int(task))
+        else:
+            return task
+
+    task = get_task_and_type_conversion(task)
+
     run = run_flow_on_task(
         task=task,
         flow=flow,
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index fe8aab808..854061148 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -110,9 +110,9 @@ def _compare_predictions(self, predictions, predictions_prime):
 
         return True
 
-    def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed):
+    def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed,
+                                             create_task_obj):
         run = openml.runs.get_run(run_id)
-        task = openml.tasks.get_task(run.task_id)
 
         # TODO: assert holdout task
 
@@ -121,12 +121,24 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed):
         predictions_url = openml._api_calls._file_id_to_url(file_id)
         response = openml._api_calls._download_text_file(predictions_url)
         predictions = arff.loads(response)
-        run_prime = openml.runs.run_model_on_task(
-            model=model_prime,
-            task=task,
-            avoid_duplicate_runs=False,
-            seed=seed,
-        )
+
+        # if create_task_obj=False, task argument in run_model_on_task is specified task_id
+        if create_task_obj:
+            task = openml.tasks.get_task(run.task_id)
+            run_prime = openml.runs.run_model_on_task(
+                model=model_prime,
+                task=task,
+                avoid_duplicate_runs=False,
+                seed=seed,
+            )
+        else:
+            run_prime = openml.runs.run_model_on_task(
+                model=model_prime,
+                task=run.task_id,
+                avoid_duplicate_runs=False,
+                seed=seed,
+            )
+
         predictions_prime = run_prime._generate_arff_dict()
 
         self._compare_predictions(predictions, predictions_prime)
@@ -425,13 +437,17 @@ def determine_grid_size(param_grid):
                 raise e
 
             self._rerun_model_and_compare_predictions(run.run_id, model_prime,
-                                                      seed)
+                                                      seed, create_task_obj=True)
+            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
+                                                      seed, create_task_obj=False)
         else:
             run_downloaded = openml.runs.get_run(run.run_id)
             sid = run_downloaded.setup_id
             model_prime = openml.setups.initialize_model(sid)
-            self._rerun_model_and_compare_predictions(run.run_id,
-                                                      model_prime, seed)
+            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
+                                                      seed, create_task_obj=True)
+            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
+                                                      seed, create_task_obj=False)
 
         # todo: check if runtime is present
         self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds,

From 371911fb9e9e23bbc39d92d88e2dda22fc9136c4 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 27 Nov 2019 17:34:03 +0100
Subject: [PATCH 14/48] Fix typo, use log10 as specified in axis labels. (#890)

---
 examples/30_extended/plot_svm_hyperparameters_tutorial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index ad91d9af9..7ae054a94 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -12,7 +12,7 @@
 ####################################################################################################
 # First step - obtaining the data
 # ===============================
-# First, we nood to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are
+# First, we need to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are
 # not part of this tutorial, this could for example be done via the website.
 #
 # For this we use the function ``list_evaluations_setup`` which can automatically join
@@ -38,7 +38,7 @@
 # Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we
 # can nicely plot them.
 hyperparameters = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
-df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log)
+df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log10)
 
 ####################################################################################################
 # Option 1 - plotting via the pandas helper functions

From b37b2614ba3cbc081ad99f1aa8c26a38c5823693 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Mon, 13 Jan 2020 11:36:42 +0100
Subject: [PATCH 15/48] Changes proposed in #885. Don't register handlers by
 default. (#889)

* Changes proposed in #885. Don't register handlers by default.

* Delay file creation until log emit. Correctly read from config.

* Remove loading/storing log level references.

* _create_log_handlers now returns early if called a second time

* Fix type errors.

* Update changelog.

* Test remove register file log handler to see if CI works.

* Undo last change. test server ssl works agian.

* Bump scikit-learn version to 0.22

* Scikit-learn 0.22 does not install properly.

* Install scikit-learn through pip instead.
---
 appveyor.yml     |  2 +-
 doc/progress.rst |  2 ++
 openml/config.py | 82 ++++++++++++++++++++++++++++++++----------------
 3 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 7f0800920..dc4402b67 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -35,7 +35,7 @@ install:
   # Install the build and runtime dependencies of the project.
   - "cd C:\\projects\\openml-python"
   - "pip install .[examples,test]"
-  - conda install --quiet --yes scikit-learn=0.20.0
+  - "pip install scikit-learn==0.21"
 
 
 # Not a .NET project, we build scikit-learn in the install step instead
diff --git a/doc/progress.rst b/doc/progress.rst
index 52fdf283d..95455f49b 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -11,6 +11,8 @@ Changelog
 
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server
+* FIX #885: Logger no longer registered by default. Added utility functions to easily register
+  logging to console and file.
 * MAINT #767: Source distribution installation is now unit-tested.
 * MAINT #865: OpenML no longer bundles test files in the source distribution.
 
diff --git a/openml/config.py b/openml/config.py
index 0f2f6e92b..4a8017228 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -7,47 +7,79 @@
 import logging
 import logging.handlers
 import os
-from typing import cast
+from typing import Tuple, cast
 
 from io import StringIO
 import configparser
 from urllib.parse import urlparse
 
 logger = logging.getLogger(__name__)
+openml_logger = logging.getLogger('openml')
+console_handler = None
+file_handler = None
 
 
-def configure_logging(console_output_level: int, file_output_level: int):
-    """ Sets the OpenML logger to DEBUG, with attached Stream- and FileHandler. """
-    # Verbosity levels as defined (https://github.com/openml/OpenML/wiki/Client-API-Standards)
-    # don't match Python values directly:
-    verbosity_map = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
+def _create_log_handlers():
+    """ Creates but does not attach the log handlers. """
+    global console_handler, file_handler
+    if console_handler is not None or file_handler is not None:
+        logger.debug("Requested to create log handlers, but they are already created.")
+        return
 
-    openml_logger = logging.getLogger('openml')
-    openml_logger.setLevel(logging.DEBUG)
     message_format = '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s'
     output_formatter = logging.Formatter(message_format, datefmt='%H:%M:%S')
 
-    console_stream = logging.StreamHandler()
-    console_stream.setFormatter(output_formatter)
-    console_stream.setLevel(verbosity_map[console_output_level])
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(output_formatter)
 
-    one_mb = 2**20
+    one_mb = 2 ** 20
     log_path = os.path.join(cache_directory, 'openml_python.log')
-    file_stream = logging.handlers.RotatingFileHandler(log_path, maxBytes=one_mb, backupCount=1)
-    file_stream.setLevel(verbosity_map[file_output_level])
-    file_stream.setFormatter(output_formatter)
+    file_handler = logging.handlers.RotatingFileHandler(
+        log_path, maxBytes=one_mb, backupCount=1, delay=True
+    )
+    file_handler.setFormatter(output_formatter)
 
-    openml_logger.addHandler(console_stream)
-    openml_logger.addHandler(file_stream)
-    return console_stream, file_stream
+
+def _convert_log_levels(log_level: int) -> Tuple[int, int]:
+    """ Converts a log level that's either defined by OpenML/Python to both specifications. """
+    # OpenML verbosity level don't match Python values directly:
+    openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
+    python_to_openml = {logging.DEBUG: 2, logging.INFO: 1, logging.WARNING: 0,
+                        logging.CRITICAL: 0, logging.ERROR: 0}
+    # Because the dictionaries share no keys, we use `get` to convert as necessary:
+    openml_level = python_to_openml.get(log_level, log_level)
+    python_level = openml_to_python.get(log_level, log_level)
+    return openml_level, python_level
+
+
+def _set_level_register_and_store(handler: logging.Handler, log_level: int):
+    """ Set handler log level, register it if needed, save setting to config file if specified. """
+    oml_level, py_level = _convert_log_levels(log_level)
+    handler.setLevel(py_level)
+
+    if openml_logger.level > py_level or openml_logger.level == logging.NOTSET:
+        openml_logger.setLevel(py_level)
+
+    if handler not in openml_logger.handlers:
+        openml_logger.addHandler(handler)
+
+
+def set_console_log_level(console_output_level: int):
+    """ Set console output to the desired level and register it with openml logger if needed. """
+    global console_handler
+    _set_level_register_and_store(cast(logging.Handler, console_handler), console_output_level)
+
+
+def set_file_log_level(file_output_level: int):
+    """ Set file output to the desired level and register it with openml logger if needed. """
+    global file_handler
+    _set_level_register_and_store(cast(logging.Handler, file_handler), file_output_level)
 
 
 # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
 _defaults = {
     'apikey': None,
     'server': "https://www.openml.org/api/v1/xml",
-    'verbosity': 0,  # WARNING
-    'file_verbosity': 2,  # DEBUG
     'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
     'avoid_duplicate_runs': 'True',
     'connection_n_retries': 2,
@@ -176,9 +208,7 @@ def _setup():
 
 
 def _parse_config():
-    """Parse the config file, set up defaults.
-    """
-
+    """ Parse the config file, set up defaults. """
     config = configparser.RawConfigParser(defaults=_defaults)
 
     if not os.path.exists(config_file):
@@ -189,6 +219,7 @@ def _parse_config():
                     "create an empty file there." % config_file)
 
     try:
+        # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
         # Cheat the ConfigParser module by adding a fake section header
         config_file_ = StringIO()
         config_file_.write("[FAKE_SECTION]\n")
@@ -255,7 +286,4 @@ def set_cache_directory(cachedir):
 ]
 
 _setup()
-
-_console_log_level = cast(int, _defaults['verbosity'])
-_file_log_level = cast(int, _defaults['file_verbosity'])
-console_log, file_log = configure_logging(_console_log_level, _file_log_level)
+_create_log_handlers()

From 07d429c843cf589d8096db76d520317acf7a99ab Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Thu, 20 Feb 2020 13:13:31 +0100
Subject: [PATCH 16/48] Feather investigation (#894)

* init feather implementation

* sparse matrix

* test notebook

* feather pickle compare

* test arrow vs feather

* add columns condition

* Testing

* get_dataset add cache format

* add pyarrow

* sparse matrix check

* pep8 and remove files

* return type

* fix type annotation

* value check

* change feather condition

* fixes and test

* fix errors

* testing file

* feather new file for attributes

* change feather attribute file path

* delete testing file

* testing changes

* delete pkls

* fixes

* fixes

* add comments

* change default caching

* pip version

* review comment fixes

* newline

* fix if condition

* Update install.sh

* pandas verison due to sparse data

* review #2

* Update appveyor.yml

* Update appveyor.yml

* rename cache dir
---
 appveyor.yml                                  |  6 +-
 ci_scripts/install.sh                         |  2 +-
 doc/progress.rst                              |  1 +
 openml/datasets/dataset.py                    | 80 ++++++++++++++-----
 openml/datasets/functions.py                  | 18 ++++-
 setup.py                                      |  5 +-
 tests/test_datasets/test_dataset_functions.py | 39 +++++++++
 7 files changed, 123 insertions(+), 28 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index dc4402b67..da372a895 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -5,10 +5,10 @@ environment:
 #     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\scikit-learn-contrib\\run_with_env.cmd"
 
  matrix:
-    - PYTHON: "C:\\Python35-x64"
-      PYTHON_VERSION: "3.5"
+    - PYTHON: "C:\\Python3-x64"
+      PYTHON_VERSION: "3.6"
       PYTHON_ARCH: "64"
-      MINICONDA: "C:\\Miniconda35-x64"
+      MINICONDA: "C:\\Miniconda36-x64"
 
 matrix:
     fast_finish: true
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 15cb84bca..93d3e1d77 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -35,7 +35,7 @@ fi
 python --version
 
 if [[ "$TEST_DIST" == "true" ]]; then
-    pip install twine nbconvert jupyter_client matplotlib pytest pytest-xdist pytest-timeout \
+    pip install twine nbconvert jupyter_client matplotlib pyarrow pytest pytest-xdist pytest-timeout \
         nbformat oslo.concurrency flaky
     python setup.py sdist
     # Find file which was modified last as done in https://stackoverflow.com/a/4561987
diff --git a/doc/progress.rst b/doc/progress.rst
index 95455f49b..681c85fa1 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -15,6 +15,7 @@ Changelog
   logging to console and file.
 * MAINT #767: Source distribution installation is now unit-tested.
 * MAINT #865: OpenML no longer bundles test files in the source distribution.
+* ADD #894: Support caching of datasets using feather format as an option.
 
 0.10.2
 ~~~~~~
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 9f831458b..db4daece4 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -36,6 +36,8 @@ class OpenMLDataset(OpenMLBase):
         Description of the dataset.
     format : str
         Format of the dataset which can be either 'arff' or 'sparse_arff'.
+    cache_format : str
+        Format for caching the dataset which can be either 'feather' or 'pickle'.
     dataset_id : int, optional
         Id autogenerated by the server.
     version : int, optional
@@ -99,7 +101,8 @@ class OpenMLDataset(OpenMLBase):
         Serialized arff dataset string.
     """
     def __init__(self, name, description, format=None,
-                 data_format='arff', dataset_id=None, version=None,
+                 data_format='arff', cache_format='pickle',
+                 dataset_id=None, version=None,
                  creator=None, contributor=None, collection_date=None,
                  upload_date=None, language=None, licence=None,
                  url=None, default_target_attribute=None,
@@ -127,6 +130,11 @@ def __init__(self, name, description, format=None,
         self.name = name
         self.version = int(version) if version is not None else None
         self.description = description
+        if cache_format not in ['feather', 'pickle']:
+            raise ValueError("cache_format must be one of 'feather' or 'pickle. "
+                             "Invalid format specified: {}".format(cache_format))
+
+        self.cache_format = cache_format
         if format is None:
             self.format = data_format
         else:
@@ -180,9 +188,11 @@ def __init__(self, name, description, format=None,
         self.qualities = _check_qualities(qualities)
 
         if data_file is not None:
-            self.data_pickle_file = self._create_pickle_in_cache(data_file)
+            self.data_pickle_file, self.data_feather_file,\
+                self.feather_attribute_file = self._create_pickle_in_cache(data_file)
         else:
-            self.data_pickle_file = None
+            self.data_pickle_file, self.data_feather_file, \
+                self.feather_attribute_file = None, None, None
 
     @property
     def id(self) -> Optional[int]:
@@ -396,10 +406,12 @@ def _parse_data_from_arff(
 
         return X, categorical, attribute_names
 
-    def _create_pickle_in_cache(self, data_file: str) -> str:
+    def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
         """ Parse the arff and pickle the result. Update any old pickle objects. """
         data_pickle_file = data_file.replace('.arff', '.pkl.py3')
-        if os.path.exists(data_pickle_file):
+        data_feather_file = data_file.replace('.arff', '.feather')
+        feather_attribute_file = data_file.replace('.arff', '.feather.attributes.pkl.py3')
+        if os.path.exists(data_pickle_file) and self.cache_format == 'pickle':
             # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
             with open(data_pickle_file, "rb") as fh:
                 try:
@@ -407,7 +419,7 @@ def _create_pickle_in_cache(self, data_file: str) -> str:
                 except EOFError:
                     # The file is likely corrupt, see #780.
                     # We deal with this when loading the data in `_load_data`.
-                    return data_pickle_file
+                    return data_pickle_file, data_feather_file, feather_attribute_file
 
             # Between v0.8 and v0.9 the format of pickled data changed from
             # np.ndarray to pd.DataFrame. This breaks some backwards compatibility,
@@ -416,32 +428,62 @@ def _create_pickle_in_cache(self, data_file: str) -> str:
             # pd.DataFrame blob. See also #646.
             if isinstance(data, pd.DataFrame) or scipy.sparse.issparse(data):
                 logger.debug("Data pickle file already exists and is up to date.")
-                return data_pickle_file
+                return data_pickle_file, data_feather_file, feather_attribute_file
+        elif os.path.exists(data_feather_file) and self.cache_format == 'feather':
+            # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
+            try:
+                data = pd.read_feather(data_feather_file)
+            except EOFError:
+                # The file is likely corrupt, see #780.
+                # We deal with this when loading the data in `_load_data`.
+                return data_pickle_file, data_feather_file, feather_attribute_file
+
+            logger.debug("Data feather file already exists and is up to date.")
+            return data_pickle_file, data_feather_file, feather_attribute_file
 
         # At this point either the pickle file does not exist, or it had outdated formatting.
         # We parse the data from arff again and populate the cache with a recent pickle file.
         X, categorical, attribute_names = self._parse_data_from_arff(data_file)
 
-        with open(data_pickle_file, "wb") as fh:
-            pickle.dump((X, categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
-        logger.debug("Saved dataset {did}: {name} to file {path}"
-                     .format(did=int(self.dataset_id or -1),
-                             name=self.name,
-                             path=data_pickle_file)
-                     )
+        # Feather format does not work for sparse datasets, so we use pickle for sparse datasets
 
-        return data_pickle_file
+        if self.cache_format == "feather" and not scipy.sparse.issparse(X):
+            logger.info("feather write {}".format(self.name))
+            X.to_feather(data_feather_file)
+            with open(feather_attribute_file, "wb") as fh:
+                pickle.dump((categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
+        else:
+            logger.info("pickle write {}".format(self.name))
+            self.cache_format = 'pickle'
+            with open(data_pickle_file, "wb") as fh:
+                pickle.dump((X, categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
+            logger.debug("Saved dataset {did}: {name} to file {path}"
+                         .format(did=int(self.dataset_id or -1),
+                                 name=self.name,
+                                 path=data_pickle_file)
+                         )
+        return data_pickle_file, data_feather_file, feather_attribute_file
 
     def _load_data(self):
         """ Load data from pickle or arff. Download data first if not present on disk. """
-        if self.data_pickle_file is None:
+        if (self.cache_format == 'pickle' and self.data_pickle_file is None) or \
+                (self.cache_format == 'feather' and self.data_feather_file is None):
             if self.data_file is None:
                 self._download_data()
-            self.data_pickle_file = self._create_pickle_in_cache(self.data_file)
+            self.data_pickle_file, self.data_feather_file, self.feather_attribute_file = \
+                self._create_pickle_in_cache(self.data_file)
 
         try:
-            with open(self.data_pickle_file, "rb") as fh:
-                data, categorical, attribute_names = pickle.load(fh)
+            if self.cache_format == 'feather':
+                logger.info("feather load data {}".format(self.name))
+                data = pd.read_feather(self.data_feather_file)
+
+                with open(self.feather_attribute_file, "rb") as fh:
+                    categorical, attribute_names = pickle.load(fh)
+            else:
+                logger.info("pickle load data {}".format(self.name))
+                with open(self.data_pickle_file, "rb") as fh:
+                    data, categorical, attribute_names = pickle.load(fh)
         except EOFError:
             logger.warning(
                 "Detected a corrupt cache file loading dataset %d: '%s'. "
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 657fbc7c6..ccf9a4239 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -451,7 +451,8 @@ def get_dataset(
     dataset_id: Union[int, str],
     download_data: bool = True,
     version: int = None,
-    error_if_multiple: bool = False
+    error_if_multiple: bool = False,
+    cache_format: str = 'pickle'
 ) -> OpenMLDataset:
     """ Download the OpenML dataset representation, optionally also download actual data file.
 
@@ -479,12 +480,19 @@ def get_dataset(
         If no version is specified, retrieve the least recent still active version.
     error_if_multiple : bool, optional (default=False)
         If ``True`` raise an error if multiple datasets are found with matching criteria.
-
+    cache_format : str, optional (default='pickle')
+        Format for caching the dataset - may be feather or pickle
+        Note that the default 'pickle' option may load slower than feather when
+        no.of.rows is very high.
     Returns
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
     """
+    if cache_format not in ['feather', 'pickle']:
+        raise ValueError("cache_format must be one of 'feather' or 'pickle. "
+                         "Invalid format specified: {}".format(cache_format))
+
     if isinstance(dataset_id, str):
         try:
             dataset_id = int(dataset_id)
@@ -527,7 +535,7 @@ def get_dataset(
                                      did_cache_dir)
 
     dataset = _create_dataset_from_description(
-        description, features, qualities, arff_file
+        description, features, qualities, arff_file, cache_format
     )
     return dataset
 
@@ -975,6 +983,7 @@ def _create_dataset_from_description(
         features: Dict,
         qualities: List,
         arff_file: str = None,
+        cache_format: str = 'pickle',
 ) -> OpenMLDataset:
     """Create a dataset object from a description dict.
 
@@ -988,6 +997,8 @@ def _create_dataset_from_description(
         Description of a dataset qualities.
     arff_file : string, optional
         Path of dataset ARFF file.
+    cache_format: string, optional
+        Caching option for datasets (feather/pickle)
 
     Returns
     -------
@@ -1019,6 +1030,7 @@ def _create_dataset_from_description(
         update_comment=description.get("oml:update_comment"),
         md5_checksum=description.get("oml:md5_checksum"),
         data_file=arff_file,
+        cache_format=cache_format,
         features=features,
         qualities=qualities,
     )
diff --git a/setup.py b/setup.py
index 46e4ae8b2..61f286874 100644
--- a/setup.py
+++ b/setup.py
@@ -49,9 +49,9 @@
                      'requests',
                      'scikit-learn>=0.18',
                      'python-dateutil',  # Installed through pandas anyway.
-                     'pandas>=0.19.2',
+                     'pandas>=0.19.2, <1.0.0',
                      'scipy>=0.13.3',
-                     'numpy>=1.6.2'
+                     'numpy>=1.6.2',
                  ],
                  extras_require={
                      'test': [
@@ -64,6 +64,7 @@
                          'nbformat',
                          'oslo.concurrency',
                          'flaky',
+                         'pyarrow'
                      ],
                      'examples': [
                          'matplotlib',
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 2f1a820aa..5e07cbe04 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1316,3 +1316,42 @@ def test_list_qualities(self):
         qualities = openml.datasets.list_qualities()
         self.assertEqual(isinstance(qualities, list), True)
         self.assertEqual(all([isinstance(q, str) for q in qualities]), True)
+
+    def test_get_dataset_cache_format_pickle(self):
+        dataset = openml.datasets.get_dataset(1)
+        self.assertEqual(type(dataset), OpenMLDataset)
+        self.assertEqual(dataset.name, 'anneal')
+        self.assertGreater(len(dataset.features), 1)
+        self.assertGreater(len(dataset.qualities), 4)
+
+        X, y, categorical, attribute_names = dataset.get_data()
+        self.assertIsInstance(X, pd.DataFrame)
+        self.assertEqual(X.shape, (898, 39))
+        self.assertEqual(len(categorical), X.shape[1])
+        self.assertEqual(len(attribute_names), X.shape[1])
+
+    def test_get_dataset_cache_format_feather(self):
+
+        dataset = openml.datasets.get_dataset(128, cache_format='feather')
+
+        # Check if dataset is written to cache directory using feather
+        cache_dir = openml.config.get_cache_directory()
+        cache_dir_for_id = os.path.join(cache_dir, 'datasets', '128')
+        feather_file = os.path.join(cache_dir_for_id, 'dataset.feather')
+        pickle_file = os.path.join(cache_dir_for_id, 'dataset.feather.attributes.pkl.py3')
+        data = pd.read_feather(feather_file)
+        self.assertTrue(os.path.isfile(feather_file), msg='Feather file is missing')
+        self.assertTrue(os.path.isfile(pickle_file), msg='Attributes pickle file is missing')
+        self.assertEqual(data.shape, (150, 5))
+
+        # Check if get_data is able to retrieve feather data
+        self.assertEqual(type(dataset), OpenMLDataset)
+        self.assertEqual(dataset.name, 'iris')
+        self.assertGreater(len(dataset.features), 1)
+        self.assertGreater(len(dataset.qualities), 4)
+
+        X, y, categorical, attribute_names = dataset.get_data()
+        self.assertIsInstance(X, pd.DataFrame)
+        self.assertEqual(X.shape, (150, 5))
+        self.assertEqual(len(categorical), X.shape[1])
+        self.assertEqual(len(attribute_names), X.shape[1])

From 4b9b8737c2a4e1187f155642c935deb2753fd14d Mon Sep 17 00:00:00 2001
From: Rong-Inspur <56406231+Rong-Inspur@users.noreply.github.com>
Date: Thu, 27 Feb 2020 09:07:42 -0800
Subject: [PATCH 17/48] Remove __version__ from __all__ in openml\__init__.py
 (#903)

* remove __version__ from __all__ in init

* Add comment for flake8 test
---
 openml/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openml/__init__.py b/openml/__init__.py
index f71c32e40..aef8a2aec 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -46,7 +46,7 @@
 from .setups import OpenMLSetup, OpenMLParameter
 
 
-from .__version__ import __version__
+from .__version__ import __version__  # noqa: F401
 
 
 def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
@@ -114,7 +114,6 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
     'study',
     'utils',
     '_api_calls',
-    '__version__',
 ]
 
 # Load the scikit-learn extension by default

From 249abc901c34bc87c4b283e42e754308a5a2d629 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Fri, 6 Mar 2020 14:01:52 +0100
Subject: [PATCH 18/48] Removing support for deprecated pandas SparseDataFrame
 (#897)

* Removing support for pandas SparseDataFrame

* Fixing rebase loss

* Reiterating with Matthias' changes

* Rolling back setup

* Fixing PEP8

* Changing check to detect sparse dataframes

* Fixing edge case to handle server side arff issue

* Removing stray comment

* Failing test case fix

* Removing stray comment
---
 .travis.yml                                    |  1 -
 doc/progress.rst                               |  4 +++-
 examples/30_extended/create_upload_tutorial.py |  8 ++++----
 examples/30_extended/datasets_tutorial.py      |  2 +-
 openml/datasets/dataset.py                     |  6 ++----
 openml/datasets/functions.py                   |  9 ++++-----
 setup.py                                       |  9 ++++-----
 tests/test_datasets/test_dataset.py            |  4 +++-
 tests/test_datasets/test_dataset_functions.py  | 17 +++++++----------
 9 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c1c397967..dcfda6d37 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,6 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.21.2"
   - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
diff --git a/doc/progress.rst b/doc/progress.rst
index 681c85fa1..976c5c750 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -10,11 +10,13 @@ Changelog
 ~~~~~~
 
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
-  switching the server
+  switching the server.
 * FIX #885: Logger no longer registered by default. Added utility functions to easily register
   logging to console and file.
 * MAINT #767: Source distribution installation is now unit-tested.
+* MAINT #836: OpenML supports only pandas version 1.0.0 or above.
 * MAINT #865: OpenML no longer bundles test files in the source distribution.
+* MAINT #897: Dropping support for Python 3.5.
 * ADD #894: Support caching of datasets using feather format as an option.
 
 0.10.2
diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index 7c3af4b9f..28687109b 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -283,15 +283,15 @@
 
 
 ############################################################################
-# Dataset is a pandas sparse dataframe
-# ====================================
+# Dataset is a pandas dataframe with sparse columns
+# =================================================
 
 sparse_data = coo_matrix((
-    [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
     ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
 ))
 column_names = ['input1', 'input2', 'y']
-df = pd.SparseDataFrame(sparse_data, columns=column_names)
+df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
 print(df.info())
 
 xor_dataset = create_dataset(
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 4728008b4..4b0bbc651 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -68,7 +68,7 @@
 # Get the actual data.
 #
 # The dataset can be returned in 2 possible formats: as a NumPy array, a SciPy
-# sparse matrix, or as a Pandas DataFrame (or SparseDataFrame). The format is
+# sparse matrix, or as a Pandas DataFrame. The format is
 # controlled with the parameter ``dataset_format`` which can be either 'array'
 # (default) or 'dataframe'. Let's first build our dataset from a NumPy array
 # and manually create a dataframe.
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index db4daece4..942067f8f 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -551,9 +551,7 @@ def _encode_if_category(column):
                 )
         elif array_format == "dataframe":
             if scipy.sparse.issparse(data):
-                return pd.SparseDataFrame(data, columns=attribute_names)
-            else:
-                return data
+                return pd.DataFrame.sparse.from_spmatrix(data, columns=attribute_names)
         else:
             data_type = "sparse-data" if scipy.sparse.issparse(data) else "non-sparse data"
             logger.warning(
@@ -602,7 +600,7 @@ def get_data(
         dataset_format : string (default='dataframe')
             The format of returned dataset.
             If ``array``, the returned dataset will be a NumPy array or a SciPy sparse matrix.
-            If ``dataframe``, the returned dataset will be a Pandas DataFrame or SparseDataFrame.
+            If ``dataframe``, the returned dataset will be a Pandas DataFrame.
 
         Returns
         -------
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index ccf9a4239..26f52a724 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -672,7 +672,7 @@ def create_dataset(name, description, creator, contributor,
     class:`openml.OpenMLDataset`
         Dataset description."""
 
-    if isinstance(data, (pd.DataFrame, pd.SparseDataFrame)):
+    if isinstance(data, pd.DataFrame):
         # infer the row id from the index of the dataset
         if row_id_attribute is None:
             row_id_attribute = data.index.name
@@ -684,8 +684,7 @@ def create_dataset(name, description, creator, contributor,
     if attributes == 'auto' or isinstance(attributes, dict):
         if not hasattr(data, "columns"):
             raise ValueError("Automatically inferring attributes requires "
-                             "a pandas DataFrame or SparseDataFrame. "
-                             "A {!r} was given instead.".format(data))
+                             "a pandas DataFrame. A {!r} was given instead.".format(data))
         # infer the type of data for each column of the DataFrame
         attributes_ = attributes_arff_from_df(data)
         if isinstance(attributes, dict):
@@ -708,8 +707,8 @@ def create_dataset(name, description, creator, contributor,
             )
 
     if hasattr(data, "columns"):
-        if isinstance(data, pd.SparseDataFrame):
-            data = data.to_coo()
+        if all(isinstance(dtype, pd.SparseDtype) for dtype in data.dtypes):
+            data = data.sparse.to_coo()
             # liac-arff only support COO matrices with sorted rows
             row_idx_sorted = np.argsort(data.row)
             data.row = data.row[row_idx_sorted]
diff --git a/setup.py b/setup.py
index 61f286874..c55888b19 100644
--- a/setup.py
+++ b/setup.py
@@ -9,9 +9,9 @@
 with open("openml/__version__.py") as fh:
     version = fh.readlines()[-1].split()[-1].strip("\"'")
 
-if sys.version_info < (3, 5):
+if sys.version_info < (3, 6):
     raise ValueError(
-        'Unsupported Python version {}.{}.{} found. OpenML requires Python 3.5 or higher.'
+        'Unsupported Python version {}.{}.{} found. OpenML requires Python 3.6 or higher.'
         .format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
     )
 
@@ -42,14 +42,14 @@
                      exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
                  ),
                  package_data={'': ['*.txt', '*.md']},
-                 python_requires=">=3.5",
+                 python_requires=">=3.6",
                  install_requires=[
                      'liac-arff>=2.4.0',
                      'xmltodict',
                      'requests',
                      'scikit-learn>=0.18',
                      'python-dateutil',  # Installed through pandas anyway.
-                     'pandas>=0.19.2, <1.0.0',
+                     'pandas>=1.0.0',
                      'scipy>=0.13.3',
                      'numpy>=1.6.2',
                  ],
@@ -92,6 +92,5 @@
                               'Operating System :: Unix',
                               'Operating System :: MacOS',
                               'Programming Language :: Python :: 3',
-                              'Programming Language :: Python :: 3.5',
                               'Programming Language :: Python :: 3.6',
                               'Programming Language :: Python :: 3.7'])
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index f40dc5015..986dca4c1 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -286,7 +286,9 @@ def test_get_sparse_dataset(self):
 
     def test_get_sparse_dataframe(self):
         rval, *_ = self.sparse_dataset.get_data()
-        self.assertTrue(isinstance(rval, pd.SparseDataFrame))
+        self.assertIsInstance(rval, pd.DataFrame)
+        np.testing.assert_array_equal(
+            [pd.SparseDtype(np.float32, fill_value=0.0)] * len(rval.dtypes), rval.dtypes)
         self.assertEqual((600, 20001), rval.shape)
 
     def test_get_sparse_dataset_with_rowid(self):
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 5e07cbe04..9c01c57e7 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -561,12 +561,9 @@ def test_attributes_arff_from_df(self):
                                       ('string', 'STRING'),
                                       ('category', ['A', 'B']),
                                       ('boolean', ['True', 'False'])])
-        # SparseDataFrame case
-        df = pd.SparseDataFrame([[1, 1.0],
-                                 [2, 2.0],
-                                 [0, 0]],
-                                columns=['integer', 'floating'],
-                                default_fill_value=0)
+        # DataFrame with Sparse columns case
+        df = pd.DataFrame({"integer": pd.arrays.SparseArray([1, 2, 0], fill_value=0),
+                           "floating": pd.arrays.SparseArray([1.0, 2.0, 0], fill_value=0.0)})
         df['integer'] = df['integer'].astype(np.int64)
         attributes = attributes_arff_from_df(df)
         self.assertEqual(attributes, [('integer', 'INTEGER'),
@@ -925,15 +922,15 @@ def test_create_dataset_pandas(self):
             "Uploaded ARFF does not match original one"
         )
 
-        # Check that SparseDataFrame are supported properly
+        # Check that DataFrame with Sparse columns are supported properly
         sparse_data = scipy.sparse.coo_matrix((
-            [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
             ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
         ))
         column_names = ['input1', 'input2', 'y']
-        df = pd.SparseDataFrame(sparse_data, columns=column_names)
+        df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
         # meta-information
-        description = 'Synthetic dataset created from a Pandas SparseDataFrame'
+        description = 'Synthetic dataset created from a Pandas DataFrame with Sparse columns'
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,

From df864c2da2ad217a453a39295fcd659c861f6070 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Thu, 18 Jun 2020 14:43:11 +0200
Subject: [PATCH 19/48] Fixing documentation typo (#914)

* Fixing typos

* Rewording
---
 examples/30_extended/create_upload_tutorial.py | 5 +++--
 openml/study/functions.py                      | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index 28687109b..92e1a4e3e 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -217,8 +217,9 @@
 print(df.info())
 
 ############################################################################
-# We enforce the column 'outlook', 'windy', and 'play' to be a categorical
-# dtype while the column 'rnd_str' is kept as a string column. Then, we can
+# We enforce the column 'outlook' and 'play' to be a categorical
+# dtype while the column 'windy' is kept as a boolean column. 'temperature'
+# and 'humidity' are kept as numeric columns. Then, we can
 # call :func:`create_dataset` by passing the dataframe and fixing the parameter
 # ``attributes`` to ``'auto'``.
 
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 35889c68d..015b5c19a 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -386,7 +386,7 @@ def detach_from_suite(suite_id: int, task_ids: List[int]) -> int:
         OpenML id of the study
 
     task_ids : list (int)
-        List of entities to link to the collection
+        List of entities to unlink from the collection
 
     Returns
     -------
@@ -404,7 +404,7 @@ def detach_from_study(study_id: int, run_ids: List[int]) -> int:
         OpenML id of the study
 
     run_ids : list (int)
-        List of entities to link to the collection
+        List of entities to unlink from the collection
 
     Returns
     -------

From 5a31f8e47317a5ef01427172d1c49ac03ccab1cb Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Wed, 1 Jul 2020 16:35:35 +0200
Subject: [PATCH 20/48] Sphinx issue fix (#923)

* Sphinx issue fix

* Removing comment
---
 doc/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 03a2ec0db..aba5ab049 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -343,7 +343,7 @@
 # Sphinx-gallery configuration.
 sphinx_gallery_conf = {
     # disable mini galleries clustered by the used functions
-    'backreferences_dir': False,
+    'backreferences_dir': None,
     # path to the examples
     'examples_dirs': '../examples',
     # path where to save gallery generated examples
@@ -355,4 +355,4 @@
 
 
 def setup(app):
-    app.add_stylesheet("codehighlightstyle.css")
+    app.add_css_file("codehighlightstyle.css")

From 861600bc1677af694550ca11497fe4d3df60de6d Mon Sep 17 00:00:00 2001
From: Joaquin Vanschoren <joaquin.vanschoren@gmail.com>
Date: Wed, 1 Jul 2020 16:49:05 +0200
Subject: [PATCH 21/48] More robust handling of openml_url (#921)

I ran into issues when the openml server config is not exactly 'https://www.openml.org/api/v1/xml', e.g. I had 'https://www.openml.org/api/v1'.
I only noticed when getting a bad dataset url.

This edit makes the API more robust against how exactly the server URL is set in the config.
---
 openml/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/config.py b/openml/config.py
index 4a8017228..8c4de1431 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -101,7 +101,7 @@ def get_server_base_url() -> str:
     =======
     str
     """
-    return server[:-len('/api/v1/xml')]
+    return server.split("/api")[0]
 
 
 apikey = _defaults['apikey']

From 8f99ff6a05d8701098e776826ae78c36e8194782 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 2 Jul 2020 16:03:32 +0200
Subject: [PATCH 22/48] [WIP] Add 781 (#922)

* Add Flake8 configuration

Uses the configuration from ci_scripts

* Add mypy configuration file

Based on the ci_scripts parameters.

* Pre-commit mypy flake8, add flake8 excludes

Any venv folder does not need flake8.
The example directory got flake8 warnings so I assumed it should be
excluded.

* Add Black to pre-commit

Add ignore E203 as Black will observe PEPs specification for white space
around a colon it is next to an expression.

* Set max line length to 100

* Blacken code

There are a few places where big indentation is introduced that may
warrant refactoring so it looks better.
I did not refactor anything yet, but did exlude three (?) lists (of ids)
to not be formatted.

* Add unit tests to flake8 and mypy pre-commit

* Use pre-commit for flake8, mypy and black checks

This ensures it runs with the same versions and settings as developers.

* Update docs, add 'test' dependencies

Add two other developer dependencies not strictly required for unit
tests, but required for development.
I think the overlap between people who want to execute unit tests and
perform commits is (close to) 100% anyway.

* Uninstall pytest-cov on appveyor ci

It seems to cause an error on import due to a missing sqlite3 dll.
As we don't check coverage anyway, hopefully just uninstalling is
sufficient.

* Add -y to uninstall

* Sphinx issue fix (#923)

* Sphinx issue fix

* Removing comment

* More robust handling of openml_url (#921)

I ran into issues when the openml server config is not exactly 'https://www.openml.org/api/v1/xml', e.g. I had 'https://www.openml.org/api/v1'.
I only noticed when getting a bad dataset url.

This edit makes the API more robust against how exactly the server URL is set in the config.

* format for black artifacts

* Add Flake8 configuration

Uses the configuration from ci_scripts

* Add mypy configuration file

Based on the ci_scripts parameters.

* Pre-commit mypy flake8, add flake8 excludes

Any venv folder does not need flake8.
The example directory got flake8 warnings so I assumed it should be
excluded.

* Add Black to pre-commit

Add ignore E203 as Black will observe PEPs specification for white space
around a colon it is next to an expression.

* Set max line length to 100

* Blacken code

There are a few places where big indentation is introduced that may
warrant refactoring so it looks better.
I did not refactor anything yet, but did exlude three (?) lists (of ids)
to not be formatted.

* Add unit tests to flake8 and mypy pre-commit

* Use pre-commit for flake8, mypy and black checks

This ensures it runs with the same versions and settings as developers.

* Update docs, add 'test' dependencies

Add two other developer dependencies not strictly required for unit
tests, but required for development.
I think the overlap between people who want to execute unit tests and
perform commits is (close to) 100% anyway.

* Uninstall pytest-cov on appveyor ci

It seems to cause an error on import due to a missing sqlite3 dll.
As we don't check coverage anyway, hopefully just uninstalling is
sufficient.

* Add -y to uninstall

* format for black artifacts

Co-authored-by: Neeratyoy Mallik <neeratyoy@gmail.com>
Co-authored-by: Joaquin Vanschoren <joaquin.vanschoren@gmail.com>
---
 .flake8                                       |   10 +
 .pre-commit-config.yaml                       |   24 +
 CONTRIBUTING.md                               |   42 +-
 appveyor.yml                                  |    2 +
 ci_scripts/flake8_diff.sh                     |    9 -
 ci_scripts/install.sh                         |    3 +-
 ci_scripts/test.sh                            |    2 +-
 doc/conf.py                                   |  126 +-
 examples/20_basic/simple_datasets_tutorial.py |   13 +-
 .../simple_flows_and_runs_tutorial.py         |    3 +-
 examples/30_extended/configure_logging.py     |    6 +-
 .../30_extended/create_upload_tutorial.py     |  158 +-
 examples/30_extended/datasets_tutorial.py     |   38 +-
 .../30_extended/fetch_evaluations_tutorial.py |   60 +-
 .../30_extended/flows_and_runs_tutorial.py    |   67 +-
 .../plot_svm_hyperparameters_tutorial.py      |   24 +-
 examples/30_extended/run_setup_tutorial.py    |   26 +-
 examples/30_extended/study_tutorial.py        |   20 +-
 examples/30_extended/suites_tutorial.py       |   10 +-
 .../task_manual_iteration_tutorial.py         |   61 +-
 examples/30_extended/tasks_tutorial.py        |   27 +-
 .../40_paper/2015_neurips_feurer_example.py   |    6 +-
 examples/40_paper/2018_ida_strang_example.py  |   47 +-
 examples/40_paper/2018_kdd_rijn_example.py    |   83 +-
 .../40_paper/2018_neurips_perrone_example.py  |  108 +-
 mypy.ini                                      |    6 +
 openml/__init__.py                            |   59 +-
 openml/_api_calls.py                          |  140 +-
 openml/base.py                                |   27 +-
 openml/config.py                              |   83 +-
 openml/datasets/__init__.py                   |   22 +-
 openml/datasets/data_feature.py               |   31 +-
 openml/datasets/dataset.py                    |  419 ++---
 openml/datasets/functions.py                  |  390 +++--
 openml/evaluations/__init__.py                |    8 +-
 openml/evaluations/evaluation.py              |   71 +-
 openml/evaluations/functions.py               |  258 ++--
 openml/exceptions.py                          |    9 +-
 openml/extensions/__init__.py                 |    8 +-
 openml/extensions/extension_interface.py      |   25 +-
 openml/extensions/functions.py                |   21 +-
 openml/extensions/sklearn/__init__.py         |    2 +-
 openml/extensions/sklearn/extension.py        |  798 +++++-----
 openml/flows/__init__.py                      |   12 +-
 openml/flows/flow.py                          |  277 ++--
 openml/flows/functions.py                     |  237 +--
 openml/runs/__init__.py                       |   24 +-
 openml/runs/functions.py                      |  422 +++---
 openml/runs/run.py                            |  411 ++---
 openml/runs/trace.py                          |  212 ++-
 openml/setups/__init__.py                     |   10 +-
 openml/setups/functions.py                    |  223 +--
 openml/setups/setup.py                        |   66 +-
 openml/study/__init__.py                      |   32 +-
 openml/study/functions.py                     |  236 +--
 openml/study/study.py                         |   52 +-
 openml/tasks/__init__.py                      |   24 +-
 openml/tasks/functions.py                     |  276 ++--
 openml/tasks/split.py                         |   79 +-
 openml/tasks/task.py                          |  269 ++--
 openml/testing.py                             |   87 +-
 openml/utils.py                               |  126 +-
 setup.py                                      |  160 +-
 tests/__init__.py                             |    2 +-
 tests/conftest.py                             |   60 +-
 tests/test_datasets/test_dataset.py           |  141 +-
 tests/test_datasets/test_dataset_functions.py |  959 ++++++------
 .../test_evaluation_functions.py              |  100 +-
 .../test_evaluations_example.py               |   19 +-
 tests/test_extensions/test_functions.py       |   19 +-
 .../test_sklearn_extension.py                 | 1348 +++++++++--------
 tests/test_flows/test_flow.py                 |  271 ++--
 tests/test_flows/test_flow_functions.py       |  281 ++--
 tests/test_openml/test_api_calls.py           |    4 +-
 tests/test_openml/test_config.py              |    9 +-
 tests/test_openml/test_openml.py              |   22 +-
 tests/test_runs/test_run.py                   |  134 +-
 tests/test_runs/test_run_functions.py         |  789 +++++-----
 tests/test_runs/test_trace.py                 |   61 +-
 tests/test_setups/__init__.py                 |    2 +-
 tests/test_setups/test_setup_functions.py     |   38 +-
 tests/test_study/test_study_examples.py       |   23 +-
 tests/test_study/test_study_functions.py      |   79 +-
 tests/test_tasks/__init__.py                  |    4 +-
 tests/test_tasks/test_classification_task.py  |    7 +-
 tests/test_tasks/test_clustering_task.py      |   11 +-
 tests/test_tasks/test_learning_curve_task.py  |    7 +-
 tests/test_tasks/test_split.py                |   27 +-
 tests/test_tasks/test_supervised_task.py      |    5 +-
 tests/test_tasks/test_task.py                 |   35 +-
 tests/test_tasks/test_task_functions.py       |  110 +-
 tests/test_tasks/test_task_methods.py         |   11 +-
 tests/test_utils/test_utils.py                |   14 +-
 93 files changed, 5751 insertions(+), 5428 deletions(-)
 create mode 100644 .flake8
 create mode 100644 .pre-commit-config.yaml
 delete mode 100755 ci_scripts/flake8_diff.sh
 create mode 100644 mypy.ini

diff --git a/.flake8 b/.flake8
new file mode 100644
index 000000000..c0fe5e06f
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,10 @@
+[flake8]
+max-line-length = 100
+show-source = True
+select = C,E,F,W,B
+ignore = E203, E402, W503
+per-file-ignores =
+    *__init__.py:F401
+exclude =
+    venv
+    examples
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..75e53f0dd
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,24 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 19.10b0
+    hooks:
+      - id: black
+        args: [--line-length=100]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.761
+    hooks:
+      - id: mypy
+        name: mypy openml
+        files: openml/*
+      - id: mypy
+        name: mypy tests
+        files: tests/*
+  - repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.3
+    hooks:
+      - id: flake8
+        name: flake8 openml
+        files: openml/*
+      - id: flake8
+        name: flake8 tests
+        files: tests/*
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7a4da2e1e..42ce4f9f8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -109,29 +109,37 @@ following rules before you submit a pull request:
  - If any source file is being added to the repository, please add the BSD 3-Clause license to it.
 
 
-You can also check for common programming errors with the following
-tools:
-
--  Code with good unittest **coverage** (at least 80%), check with:
-
+First install openml with its test dependencies by running
   ```bash
-  $ pip install pytest pytest-cov
-  $ pytest --cov=. path/to/tests_for_package
+  $ pip install -e .[test]
   ```
-
--  No style warnings, check with:
-
+from the repository folder.
+This will install dependencies to run unit tests, as well as [pre-commit](https://pre-commit.com/).
+To run the unit tests, and check their code coverage, run:
   ```bash
-  $ pip install flake8
-  $ flake8 --ignore E402,W503 --show-source --max-line-length 100
+  $ pytest --cov=. path/to/tests_for_package
   ```
-
--  No mypy (typing) issues, check with:
-
+Make sure your code has good unittest **coverage** (at least 80%).
+
+Pre-commit is used for various style checking and code formatting.
+Before each commit, it will automatically run:
+ - [black](https://black.readthedocs.io/en/stable/) a code formatter.
+   This will automatically format your code.
+   Make sure to take a second look after any formatting takes place,
+   if the resulting code is very bloated, consider a (small) refactor.
+   *note*: If Black reformats your code, the commit will automatically be aborted.
+   Make sure to add the formatted files (back) to your commit after checking them.
+ - [mypy](https://mypy.readthedocs.io/en/stable/) a static type checker.
+   In particular, make sure each function you work on has type hints.
+ - [flake8](https://flake8.pycqa.org/en/latest/index.html) style guide enforcement.
+   Almost all of the black-formatted code should automatically pass this check,
+   but make sure to make adjustments if it does fail.
+    
+If you want to run the pre-commit tests without doing a commit, run:
   ```bash
-  $ pip install mypy
-  $ mypy openml --ignore-missing-imports --follow-imports skip
+  $ pre-commit run --all-files
   ```
+Make sure to do this at least once before your first commit to check your setup works.
 
 Filing bugs
 -----------
diff --git a/appveyor.yml b/appveyor.yml
index da372a895..151a5e3f7 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -36,6 +36,8 @@ install:
   - "cd C:\\projects\\openml-python"
   - "pip install .[examples,test]"
   - "pip install scikit-learn==0.21"
+  # Uninstall coverage, as it leads to an error on appveyor
+  - "pip uninstall -y pytest-cov"
 
 
 # Not a .NET project, we build scikit-learn in the install step instead
diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
deleted file mode 100755
index 1e32f2c7d..000000000
--- a/ci_scripts/flake8_diff.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-# License: BSD 3-Clause
-
-# Update /CONTRIBUTING.md if these commands change.
-# The reason for not advocating using this script directly is that it
-# might not work out of the box on Windows.
-flake8 --ignore E402,W503 --show-source --max-line-length 100 $options
-mypy openml --ignore-missing-imports --follow-imports skip
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 93d3e1d77..67cd1bb38 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -58,7 +58,8 @@ if [[ "$COVERAGE" == "true" ]]; then
     pip install codecov pytest-cov
 fi
 if [[ "$RUN_FLAKE8" == "true" ]]; then
-    pip install flake8 mypy
+    pip install pre-commit
+    pre-commit install
 fi
 
 # Install scikit-learn last to make sure the openml package installation works
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index 5ffced544..0a1f94df6 100644
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -28,7 +28,7 @@ run_tests() {
 }
 
 if [[ "$RUN_FLAKE8" == "true" ]]; then
-    source ci_scripts/flake8_diff.sh
+    pre-commit run --all-files
 fi
 
 if [[ "$SKIP_TESTS" != "true" ]]; then
diff --git a/doc/conf.py b/doc/conf.py
index aba5ab049..9c4606143 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -23,8 +23,8 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 # sys.path.insert(0, os.path.abspath('.')# )
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
 # -- General configuration ------------------------------------------------
 
@@ -35,38 +35,38 @@
 #  extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 #  ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.doctest',
-    'sphinx.ext.coverage',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.ifconfig',
-    'sphinx.ext.autosectionlabel',
-    'sphinx_gallery.gen_gallery',
-    'numpydoc'
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.coverage",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.ifconfig",
+    "sphinx.ext.autosectionlabel",
+    "sphinx_gallery.gen_gallery",
+    "numpydoc",
 ]
 
 autosummary_generate = True
 numpydoc_show_class_members = False
 
-autodoc_default_flags = ['members', 'inherited-members']
+autodoc_default_flags = ["members", "inherited-members"]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
 # source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'OpenML'
-copyright = (
-    u'2014-{}, the OpenML-Python team.'.format(time.strftime("%Y,%m,%d,%H,%M,%S").split(',')[0])
+project = u"OpenML"
+copyright = u"2014-{}, the OpenML-Python team.".format(
+    time.strftime("%Y,%m,%d,%H,%M,%S").split(",")[0]
 )
 
 # The version info for the project you're documenting, acts as replacement for
@@ -90,7 +90,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build', '_templates', '_static']
+exclude_patterns = ["_build", "_templates", "_static"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
@@ -108,7 +108,7 @@
 # show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
 # modindex_common_prefix = []
@@ -121,39 +121,32 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'bootstrap'
+html_theme = "bootstrap"
 
 html_theme_options = {
     # Navigation bar title. (Default: ``project`` value)
-    'navbar_title': "OpenML",
-
+    "navbar_title": "OpenML",
     # Tab name for entire site. (Default: "Site")
     # 'navbar_site_name': "Site",
-
     # A list of tuples containting pages to link to.  The value should
     # be in the form [(name, page), ..]
-    'navbar_links': [
-        ('Start', 'index'),
-        ('User Guide', 'usage'),
-        ('API', 'api'),
-        ('Examples', 'examples/index'),
-        ('Contributing', 'contributing'),
-        ('Changelog', 'progress'),
+    "navbar_links": [
+        ("Start", "index"),
+        ("User Guide", "usage"),
+        ("API", "api"),
+        ("Examples", "examples/index"),
+        ("Contributing", "contributing"),
+        ("Changelog", "progress"),
     ],
-
     # Render the next and previous page links in navbar. (Default: true)
-    'navbar_sidebarrel': False,
-
+    "navbar_sidebarrel": False,
     # Render the current pages TOC in the navbar. (Default: true)
-    'navbar_pagenav': False,
-
+    "navbar_pagenav": False,
     # Tab name for the current pages TOC. (Default: "Page")
-    'navbar_pagenav_name': "On this page",
-
+    "navbar_pagenav_name": "On this page",
     # Global TOC depth for "site" navbar tab. (Default: 1)
     # Switching to -1 shows all levels.
-    'globaltoc_depth': 1,
-
+    "globaltoc_depth": 1,
     # Include hidden TOCs in Site navbar?
     #
     # Note: If this is "false", you cannot have mixed ``:hidden:`` and
@@ -161,29 +154,24 @@
     # will break.
     #
     # Values: "true" (default) or "false"
-    'globaltoc_includehidden': "false",
-
+    "globaltoc_includehidden": "false",
     # HTML navbar class (Default: "navbar") to attach to <div> element.
     # For black navbar, do "navbar navbar-inverse"
-    'navbar_class': "navbar",
-
+    "navbar_class": "navbar",
     # Fix navigation bar to top of page?
     # Values: "true" (default) or "false"
-    'navbar_fixed_top': "true",
-
+    "navbar_fixed_top": "true",
     # Location of link to source.
     # Options are "nav" (default), "footer" or anything else to exclude.
-    'source_link_position': "None",
-
+    "source_link_position": "None",
     # Bootswatch (http://bootswatch.com/) theme.
     #
     # Options are nothing with "" (default) or the name of a valid theme
     # such as "amelia" or "cosmo".
-    'bootswatch_theme': "flatly",
-
+    "bootswatch_theme": "flatly",
     # Choose Bootstrap version.
     # Values: "3" (default) or "2" (in quotes)
-    'bootstrap_version': "3",
+    "bootstrap_version": "3",
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -224,7 +212,7 @@
 # html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'**': ['localtoc.html']}
+html_sidebars = {"**": ["localtoc.html"]}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
@@ -257,7 +245,7 @@
 # html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'OpenMLdoc'
+htmlhelp_basename = "OpenMLdoc"
 
 
 # -- Options for LaTeX output ---------------------------------------------
@@ -265,10 +253,8 @@
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     # 'preamble': '',
 }
@@ -276,8 +262,9 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
-latex_documents = [('index', 'OpenML.tex', u'OpenML Documentation',
-                    u'Matthias Feurer', 'manual'), ]
+latex_documents = [
+    ("index", "OpenML.tex", u"OpenML Documentation", u"Matthias Feurer", "manual"),
+]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
@@ -304,10 +291,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    ('index', 'openml', u'OpenML Documentation',
-     [u'Matthias Feurer'], 1)
-]
+man_pages = [("index", "openml", u"OpenML Documentation", [u"Matthias Feurer"], 1)]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False
@@ -319,9 +303,15 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    ('index', 'OpenML', u'OpenML Documentation',
-     u'Matthias Feurer', 'OpenML', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        "index",
+        "OpenML",
+        u"OpenML Documentation",
+        u"Matthias Feurer",
+        "OpenML",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
@@ -343,13 +333,13 @@
 # Sphinx-gallery configuration.
 sphinx_gallery_conf = {
     # disable mini galleries clustered by the used functions
-    'backreferences_dir': None,
+    "backreferences_dir": None,
     # path to the examples
-    'examples_dirs': '../examples',
+    "examples_dirs": "../examples",
     # path where to save gallery generated examples
-    'gallery_dirs': 'examples',
+    "gallery_dirs": "examples",
     # compile execute examples in the examples dir
-    'filename_pattern': '.*example.py$|.*tutorial.py$',
+    "filename_pattern": ".*example.py$|.*tutorial.py$",
     # TODO: fix back/forward references for the examples.
 }
 
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
index bb90aedcc..c525a3ef9 100644
--- a/examples/20_basic/simple_datasets_tutorial.py
+++ b/examples/20_basic/simple_datasets_tutorial.py
@@ -14,11 +14,12 @@
 # License: BSD 3-Clause
 
 import openml
+
 ############################################################################
 # List datasets
 # =============
 
-datasets_df = openml.datasets.list_datasets(output_format='dataframe')
+datasets_df = openml.datasets.list_datasets(output_format="dataframe")
 print(datasets_df.head(n=10))
 
 ############################################################################
@@ -29,8 +30,10 @@
 dataset = openml.datasets.get_dataset(61)
 
 # Print a summary
-print(f"This is dataset '{dataset.name}', the target feature is "
-      f"'{dataset.default_target_attribute}'")
+print(
+    f"This is dataset '{dataset.name}', the target feature is "
+    f"'{dataset.default_target_attribute}'"
+)
 print(f"URL: {dataset.url}")
 print(dataset.description[:500])
 
@@ -45,8 +48,7 @@
 # attribute_names - the names of the features for the examples (X) and
 # target feature (y)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    dataset_format='dataframe',
-    target=dataset.default_target_attribute
+    dataset_format="dataframe", target=dataset.default_target_attribute
 )
 ############################################################################
 # Visualize the dataset
@@ -55,6 +57,7 @@
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
+
 sns.set_style("darkgrid")
 
 
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
index 14c5c7761..e88add911 100644
--- a/examples/20_basic/simple_flows_and_runs_tutorial.py
+++ b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -23,8 +23,7 @@
 # NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
 dataset = openml.datasets.get_dataset(20)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    dataset_format='array',
-    target=dataset.default_target_attribute
+    dataset_format="array", target=dataset.default_target_attribute
 )
 clf = neighbors.KNeighborsClassifier(n_neighbors=3)
 clf.fit(X, y)
diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
index 9b14fffd6..a600b0632 100644
--- a/examples/30_extended/configure_logging.py
+++ b/examples/30_extended/configure_logging.py
@@ -25,7 +25,8 @@
 # License: BSD 3-Clause
 
 import openml
-openml.datasets.get_dataset('iris')
+
+openml.datasets.get_dataset("iris")
 
 # With default configuration, the above example will show no output to console.
 # However, in your cache directory you should find a file named 'openml_python.log',
@@ -37,9 +38,10 @@
 # The processed log levels can be configured programmatically:
 
 import logging
+
 openml.config.console_log.setLevel(logging.DEBUG)
 openml.config.file_log.setLevel(logging.WARNING)
-openml.datasets.get_dataset('iris')
+openml.datasets.get_dataset("iris")
 
 # Now the log level that was previously written to file should also be shown in the console.
 # The message is now no longer written to file as the `file_log` was set to level `WARNING`.
diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index 92e1a4e3e..f0ea00016 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -44,7 +44,7 @@
 # via the API.
 
 diabetes = sklearn.datasets.load_diabetes()
-name = 'Diabetes(scikit-learn)'
+name = "Diabetes(scikit-learn)"
 X = diabetes.data
 y = diabetes.target
 attribute_names = diabetes.feature_names
@@ -59,18 +59,15 @@
 
 data = np.concatenate((X, y.reshape((-1, 1))), axis=1)
 attribute_names = list(attribute_names)
-attributes = [
-    (attribute_name, 'REAL') for attribute_name in attribute_names
-] + [('class', 'INTEGER')]
+attributes = [(attribute_name, "REAL") for attribute_name in attribute_names] + [
+    ("class", "INTEGER")
+]
 citation = (
     "Bradley Efron, Trevor Hastie, Iain Johnstone and "
     "Robert Tibshirani (2004) (Least Angle Regression) "
     "Annals of Statistics (with discussion), 407-499"
 )
-paper_url = (
-    'http://web.stanford.edu/~hastie/Papers/'
-    'LARS/LeastAngle_2002.pdf'
-)
+paper_url = "http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf"
 
 ############################################################################
 # Create the dataset object
@@ -88,19 +85,18 @@
     # Textual description of the dataset.
     description=description,
     # The person who created the dataset.
-    creator="Bradley Efron, Trevor Hastie, "
-            "Iain Johnstone and Robert Tibshirani",
+    creator="Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani",
     # People who contributed to the current version of the dataset.
     contributor=None,
     # The date the data was originally collected, given by the uploader.
-    collection_date='09-01-2012',
+    collection_date="09-01-2012",
     # Language in which the data is represented.
     # Starts with 1 upper case letter, rest lower case, e.g. 'English'.
-    language='English',
+    language="English",
     # License under which the data is/will be distributed.
-    licence='BSD (from scikit-learn)',
+    licence="BSD (from scikit-learn)",
     # Name of the target. Can also have multiple values (comma-separated).
-    default_target_attribute='class',
+    default_target_attribute="class",
     # The attribute that represents the row-id column, if present in the
     # dataset.
     row_id_attribute=None,
@@ -113,10 +109,8 @@
     attributes=attributes,
     data=data,
     # A version label which is provided by the user.
-    version_label='test',
-    original_data_url=(
-        'http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html'
-    ),
+    version_label="test",
+    original_data_url="http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html",
     paper_url=paper_url,
 )
 
@@ -135,62 +129,62 @@
 # http://storm.cis.fordham.edu/~gweiss/data-mining/datasets.html
 
 data = [
-    ['sunny', 85, 85, 'FALSE', 'no'],
-    ['sunny', 80, 90, 'TRUE', 'no'],
-    ['overcast', 83, 86, 'FALSE', 'yes'],
-    ['rainy', 70, 96, 'FALSE', 'yes'],
-    ['rainy', 68, 80, 'FALSE', 'yes'],
-    ['rainy', 65, 70, 'TRUE', 'no'],
-    ['overcast', 64, 65, 'TRUE', 'yes'],
-    ['sunny', 72, 95, 'FALSE', 'no'],
-    ['sunny', 69, 70, 'FALSE', 'yes'],
-    ['rainy', 75, 80, 'FALSE', 'yes'],
-    ['sunny', 75, 70, 'TRUE', 'yes'],
-    ['overcast', 72, 90, 'TRUE', 'yes'],
-    ['overcast', 81, 75, 'FALSE', 'yes'],
-    ['rainy', 71, 91, 'TRUE', 'no'],
+    ["sunny", 85, 85, "FALSE", "no"],
+    ["sunny", 80, 90, "TRUE", "no"],
+    ["overcast", 83, 86, "FALSE", "yes"],
+    ["rainy", 70, 96, "FALSE", "yes"],
+    ["rainy", 68, 80, "FALSE", "yes"],
+    ["rainy", 65, 70, "TRUE", "no"],
+    ["overcast", 64, 65, "TRUE", "yes"],
+    ["sunny", 72, 95, "FALSE", "no"],
+    ["sunny", 69, 70, "FALSE", "yes"],
+    ["rainy", 75, 80, "FALSE", "yes"],
+    ["sunny", 75, 70, "TRUE", "yes"],
+    ["overcast", 72, 90, "TRUE", "yes"],
+    ["overcast", 81, 75, "FALSE", "yes"],
+    ["rainy", 71, 91, "TRUE", "no"],
 ]
 
 attribute_names = [
-    ('outlook', ['sunny', 'overcast', 'rainy']),
-    ('temperature', 'REAL'),
-    ('humidity', 'REAL'),
-    ('windy', ['TRUE', 'FALSE']),
-    ('play', ['yes', 'no']),
+    ("outlook", ["sunny", "overcast", "rainy"]),
+    ("temperature", "REAL"),
+    ("humidity", "REAL"),
+    ("windy", ["TRUE", "FALSE"]),
+    ("play", ["yes", "no"]),
 ]
 
 description = (
-    'The weather problem is a tiny dataset that we will use repeatedly'
-    ' to illustrate machine learning methods. Entirely fictitious, it '
-    'supposedly concerns the conditions that are suitable for playing '
-    'some unspecified game. In general, instances in a dataset are '
-    'characterized by the values of features, or attributes, that measure '
-    'different aspects of the instance. In this case there are four '
-    'attributes: outlook, temperature, humidity, and windy. '
-    'The outcome is whether to play or not.'
+    "The weather problem is a tiny dataset that we will use repeatedly"
+    " to illustrate machine learning methods. Entirely fictitious, it "
+    "supposedly concerns the conditions that are suitable for playing "
+    "some unspecified game. In general, instances in a dataset are "
+    "characterized by the values of features, or attributes, that measure "
+    "different aspects of the instance. In this case there are four "
+    "attributes: outlook, temperature, humidity, and windy. "
+    "The outcome is whether to play or not."
 )
 
 citation = (
-    'I. H. Witten, E. Frank, M. A. Hall, and ITPro,'
-    'Data mining practical machine learning tools and techniques, '
-    'third edition. Burlington, Mass.: Morgan Kaufmann Publishers, 2011'
+    "I. H. Witten, E. Frank, M. A. Hall, and ITPro,"
+    "Data mining practical machine learning tools and techniques, "
+    "third edition. Burlington, Mass.: Morgan Kaufmann Publishers, 2011"
 )
 
 weather_dataset = create_dataset(
     name="Weather",
     description=description,
-    creator='I. H. Witten, E. Frank, M. A. Hall, and ITPro',
+    creator="I. H. Witten, E. Frank, M. A. Hall, and ITPro",
     contributor=None,
-    collection_date='01-01-2011',
-    language='English',
+    collection_date="01-01-2011",
+    language="English",
     licence=None,
-    default_target_attribute='play',
+    default_target_attribute="play",
     row_id_attribute=None,
     ignore_attribute=None,
     citation=citation,
     attributes=attribute_names,
     data=data,
-    version_label='example',
+    version_label="example",
 )
 
 ############################################################################
@@ -211,9 +205,9 @@
 
 df = pd.DataFrame(data, columns=[col_name for col_name, _ in attribute_names])
 # enforce the categorical column to have a categorical dtype
-df['outlook'] = df['outlook'].astype('category')
-df['windy'] = df['windy'].astype('bool')
-df['play'] = df['play'].astype('category')
+df["outlook"] = df["outlook"].astype("category")
+df["windy"] = df["windy"].astype("bool")
+df["play"] = df["play"].astype("category")
 print(df.info())
 
 ############################################################################
@@ -226,18 +220,18 @@
 weather_dataset = create_dataset(
     name="Weather",
     description=description,
-    creator='I. H. Witten, E. Frank, M. A. Hall, and ITPro',
+    creator="I. H. Witten, E. Frank, M. A. Hall, and ITPro",
     contributor=None,
-    collection_date='01-01-2011',
-    language='English',
+    collection_date="01-01-2011",
+    language="English",
     licence=None,
-    default_target_attribute='play',
+    default_target_attribute="play",
     row_id_attribute=None,
     ignore_attribute=None,
     citation=citation,
-    attributes='auto',
+    attributes="auto",
     data=df,
-    version_label='example',
+    version_label="example",
 )
 
 ############################################################################
@@ -249,32 +243,31 @@
 # Dataset is a sparse matrix
 # ==========================
 
-sparse_data = coo_matrix((
-    [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-    ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
-))
+sparse_data = coo_matrix(
+    ([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
+)
 
 column_names = [
-    ('input1', 'REAL'),
-    ('input2', 'REAL'),
-    ('y', 'REAL'),
+    ("input1", "REAL"),
+    ("input2", "REAL"),
+    ("y", "REAL"),
 ]
 
 xor_dataset = create_dataset(
     name="XOR",
-    description='Dataset representing the XOR operation',
+    description="Dataset representing the XOR operation",
     creator=None,
     contributor=None,
     collection_date=None,
-    language='English',
+    language="English",
     licence=None,
-    default_target_attribute='y',
+    default_target_attribute="y",
     row_id_attribute=None,
     ignore_attribute=None,
     citation=None,
     attributes=column_names,
     data=sparse_data,
-    version_label='example',
+    version_label="example",
 )
 
 ############################################################################
@@ -287,29 +280,28 @@
 # Dataset is a pandas dataframe with sparse columns
 # =================================================
 
-sparse_data = coo_matrix((
-    [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-    ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
-))
-column_names = ['input1', 'input2', 'y']
+sparse_data = coo_matrix(
+    ([1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
+)
+column_names = ["input1", "input2", "y"]
 df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
 print(df.info())
 
 xor_dataset = create_dataset(
     name="XOR",
-    description='Dataset representing the XOR operation',
+    description="Dataset representing the XOR operation",
     creator=None,
     contributor=None,
     collection_date=None,
-    language='English',
+    language="English",
     licence=None,
-    default_target_attribute='y',
+    default_target_attribute="y",
     row_id_attribute=None,
     ignore_attribute=None,
     citation=None,
-    attributes='auto',
+    attributes="auto",
     data=df,
-    version_label='example',
+    version_label="example",
 )
 
 ############################################################################
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 4b0bbc651..d7971d0f1 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -24,17 +24,14 @@
 openml_list = openml.datasets.list_datasets()  # returns a dict
 
 # Show a nice table with some key data properties
-datalist = pd.DataFrame.from_dict(openml_list, orient='index')
-datalist = datalist[[
-    'did', 'name', 'NumberOfInstances',
-    'NumberOfFeatures', 'NumberOfClasses'
-]]
+datalist = pd.DataFrame.from_dict(openml_list, orient="index")
+datalist = datalist[["did", "name", "NumberOfInstances", "NumberOfFeatures", "NumberOfClasses"]]
 
 print(f"First 10 of {len(datalist)} datasets...")
 datalist.head(n=10)
 
 # The same can be done with lesser lines of code
-openml_df = openml.datasets.list_datasets(output_format='dataframe')
+openml_df = openml.datasets.list_datasets(output_format="dataframe")
 openml_df.head(n=10)
 
 ############################################################################
@@ -44,12 +41,11 @@
 # * Find datasets with more than 10000 examples.
 # * Find a dataset called 'eeg_eye_state'.
 # * Find all datasets with more than 50 classes.
-datalist[datalist.NumberOfInstances > 10000
-         ].sort_values(['NumberOfInstances']).head(n=20)
+datalist[datalist.NumberOfInstances > 10000].sort_values(["NumberOfInstances"]).head(n=20)
 ############################################################################
 datalist.query('name == "eeg-eye-state"')
 ############################################################################
-datalist.query('NumberOfClasses > 50')
+datalist.query("NumberOfClasses > 50")
 
 ############################################################################
 # Download datasets
@@ -59,8 +55,10 @@
 dataset = openml.datasets.get_dataset(1471)
 
 # Print a summary
-print(f"This is dataset '{dataset.name}', the target feature is "
-      f"'{dataset.default_target_attribute}'")
+print(
+    f"This is dataset '{dataset.name}', the target feature is "
+    f"'{dataset.default_target_attribute}'"
+)
 print(f"URL: {dataset.url}")
 print(dataset.description[:500])
 
@@ -73,19 +71,17 @@
 # (default) or 'dataframe'. Let's first build our dataset from a NumPy array
 # and manually create a dataframe.
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    dataset_format='array',
-    target=dataset.default_target_attribute
+    dataset_format="array", target=dataset.default_target_attribute
 )
 eeg = pd.DataFrame(X, columns=attribute_names)
-eeg['class'] = y
+eeg["class"] = y
 print(eeg[:10])
 
 ############################################################################
 # Instead of manually creating the dataframe, you can already request a
 # dataframe with the correct dtypes.
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    target=dataset.default_target_attribute,
-    dataset_format='dataframe'
+    target=dataset.default_target_attribute, dataset_format="dataframe"
 )
 print(X.head())
 print(X.info())
@@ -105,10 +101,10 @@
 eegs = eeg.sample(n=1000)
 _ = pd.plotting.scatter_matrix(
     eegs.iloc[:100, :4],
-    c=eegs[:100]['class'],
+    c=eegs[:100]["class"],
     figsize=(10, 10),
-    marker='o',
-    hist_kwds={'bins': 20},
-    alpha=.8,
-    cmap='plasma'
+    marker="o",
+    hist_kwds={"bins": 20},
+    alpha=0.8,
+    cmap="plasma",
 )
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index b1c7b9a3d..de636e074 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -32,12 +32,14 @@
 # Required filters can be applied to retrieve results from runs as required.
 
 # We shall retrieve a small set (only 10 entries) to test the listing function for evaluations
-openml.evaluations.list_evaluations(function='predictive_accuracy', size=10,
-                                    output_format='dataframe')
+openml.evaluations.list_evaluations(
+    function="predictive_accuracy", size=10, output_format="dataframe"
+)
 
 # Using other evaluation metrics, 'precision' in this case
-evals = openml.evaluations.list_evaluations(function='precision', size=10,
-                                            output_format='dataframe')
+evals = openml.evaluations.list_evaluations(
+    function="precision", size=10, output_format="dataframe"
+)
 
 # Querying the returned results for precision above 0.98
 print(evals[evals.value > 0.98])
@@ -48,7 +50,7 @@
 # Over here we shall briefly take a look at the details of the task.
 
 # We will start by displaying a simple *supervised classification* task:
-task_id = 167140        # https://www.openml.org/t/167140
+task_id = 167140  # https://www.openml.org/t/167140
 task = openml.tasks.get_task(task_id)
 print(task)
 
@@ -59,13 +61,14 @@
 # we displayed previously.
 # Note that we now filter the evaluations based on another parameter 'task'.
 
-metric = 'predictive_accuracy'
-evals = openml.evaluations.list_evaluations(function=metric, task=[task_id],
-                                            output_format='dataframe')
+metric = "predictive_accuracy"
+evals = openml.evaluations.list_evaluations(
+    function=metric, task=[task_id], output_format="dataframe"
+)
 # Displaying the first 10 rows
 print(evals.head(n=10))
 # Sorting the evaluations in decreasing order of the metric chosen
-evals = evals.sort_values(by='value', ascending=False)
+evals = evals.sort_values(by="value", ascending=False)
 print("\nDisplaying head of sorted dataframe: ")
 print(evals.head())
 
@@ -79,19 +82,18 @@
 from matplotlib import pyplot as plt
 
 
-def plot_cdf(values, metric='predictive_accuracy'):
+def plot_cdf(values, metric="predictive_accuracy"):
     max_val = max(values)
-    n, bins, patches = plt.hist(values, density=True, histtype='step',
-                                cumulative=True, linewidth=3)
+    n, bins, patches = plt.hist(values, density=True, histtype="step", cumulative=True, linewidth=3)
     patches[0].set_xy(patches[0].get_xy()[:-1])
     plt.xlim(max(0, min(values) - 0.1), 1)
-    plt.title('CDF')
+    plt.title("CDF")
     plt.xlabel(metric)
-    plt.ylabel('Likelihood')
-    plt.grid(b=True, which='major', linestyle='-')
+    plt.ylabel("Likelihood")
+    plt.grid(b=True, which="major", linestyle="-")
     plt.minorticks_on()
-    plt.grid(b=True, which='minor', linestyle='--')
-    plt.axvline(max_val, linestyle='--', color='gray')
+    plt.grid(b=True, which="minor", linestyle="--")
+    plt.axvline(max_val, linestyle="--", color="gray")
     plt.text(max_val, 0, "%.3f" % max_val, fontsize=9)
     plt.show()
 
@@ -111,7 +113,7 @@ def plot_cdf(values, metric='predictive_accuracy'):
 import pandas as pd
 
 
-def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
+def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
     # Collecting the top 10 performing unique flow_id
     flow_ids = evaluations.flow_id.unique()[:top_n]
 
@@ -123,18 +125,18 @@ def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
         df = pd.concat([df, flow_values], ignore_index=True, axis=1)
     fig, axs = plt.subplots()
     df.boxplot()
-    axs.set_title('Boxplot comparing ' + metric + ' for different flows')
+    axs.set_title("Boxplot comparing " + metric + " for different flows")
     axs.set_ylabel(metric)
-    axs.set_xlabel('Flow ID')
+    axs.set_xlabel("Flow ID")
     axs.set_xticklabels(flow_ids)
-    axs.grid(which='major', linestyle='-', linewidth='0.5', color='gray', axis='y')
+    axs.grid(which="major", linestyle="-", linewidth="0.5", color="gray", axis="y")
     axs.minorticks_on()
-    axs.grid(which='minor', linestyle='--', linewidth='0.5', color='gray', axis='y')
+    axs.grid(which="minor", linestyle="--", linewidth="0.5", color="gray", axis="y")
     # Counting the number of entries for each flow in the data frame
     #   which gives the number of runs for each flow
     flow_freq = list(df.count(axis=0, numeric_only=True))
     for i in range(len(flow_ids)):
-        axs.text(i + 1.05, np.nanmin(df.values), str(flow_freq[i]) + '\nrun(s)', fontsize=7)
+        axs.text(i + 1.05, np.nanmin(df.values), str(flow_freq[i]) + "\nrun(s)", fontsize=7)
     plt.show()
 
 
@@ -159,8 +161,9 @@ def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
 
 # List evaluations in descending order based on predictive_accuracy with
 # hyperparameters
-evals_setups = openml.evaluations.list_evaluations_setups(function='predictive_accuracy', task=[31],
-                                                          size=100, sort_order='desc')
+evals_setups = openml.evaluations.list_evaluations_setups(
+    function="predictive_accuracy", task=[31], size=100, sort_order="desc"
+)
 
 ""
 print(evals_setups.head())
@@ -169,10 +172,9 @@ def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
 # Return evaluations for flow_id in descending order based on predictive_accuracy
 # with hyperparameters. parameters_in_separate_columns returns parameters in
 # separate columns
-evals_setups = openml.evaluations.list_evaluations_setups(function='predictive_accuracy',
-                                                          flow=[6767],
-                                                          size=100,
-                                                          parameters_in_separate_columns=True)
+evals_setups = openml.evaluations.list_evaluations_setups(
+    function="predictive_accuracy", flow=[6767], size=100, parameters_in_separate_columns=True
+)
 
 ""
 print(evals_setups.head(10))
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index b307ad260..76eb2f219 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -24,8 +24,7 @@
 # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
 dataset = openml.datasets.get_dataset(68)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    dataset_format='array',
-    target=dataset.default_target_attribute
+    dataset_format="array", target=dataset.default_target_attribute
 )
 clf = neighbors.KNeighborsClassifier(n_neighbors=1)
 clf.fit(X, y)
@@ -36,12 +35,12 @@
 # * e.g. categorical features -> do feature encoding
 dataset = openml.datasets.get_dataset(17)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
-    dataset_format='array',
-    target=dataset.default_target_attribute
+    dataset_format="array", target=dataset.default_target_attribute
 )
 print(f"Categorical features: {categorical_indicator}")
 transformer = compose.ColumnTransformer(
-    [('one_hot_encoder', preprocessing.OneHotEncoder(categories='auto'), categorical_indicator)])
+    [("one_hot_encoder", preprocessing.OneHotEncoder(categories="auto"), categorical_indicator)]
+)
 X = transformer.fit_transform(X)
 clf.fit(X, y)
 
@@ -86,29 +85,37 @@
 task = openml.tasks.get_task(1)
 features = task.get_dataset().features
 nominal_feature_indices = [
-    i for i in range(len(features))
-    if features[i].name != task.target_name and features[i].data_type == 'nominal'
+    i
+    for i in range(len(features))
+    if features[i].name != task.target_name and features[i].data_type == "nominal"
 ]
-pipe = pipeline.Pipeline(steps=[
-    (
-        'Preprocessing',
-        compose.ColumnTransformer([
-            ('Nominal', pipeline.Pipeline(
+pipe = pipeline.Pipeline(
+    steps=[
+        (
+            "Preprocessing",
+            compose.ColumnTransformer(
                 [
-                    ('Imputer', impute.SimpleImputer(strategy='most_frequent')),
                     (
-                        'Encoder',
-                        preprocessing.OneHotEncoder(
-                            sparse=False, handle_unknown='ignore',
-                        )
+                        "Nominal",
+                        pipeline.Pipeline(
+                            [
+                                ("Imputer", impute.SimpleImputer(strategy="most_frequent")),
+                                (
+                                    "Encoder",
+                                    preprocessing.OneHotEncoder(
+                                        sparse=False, handle_unknown="ignore",
+                                    ),
+                                ),
+                            ]
+                        ),
+                        nominal_feature_indices,
                     ),
-                ]),
-                nominal_feature_indices,
-             ),
-        ]),
-    ),
-    ('Classifier', ensemble.RandomForestClassifier(n_estimators=10))
-])
+                ]
+            ),
+        ),
+        ("Classifier", ensemble.RandomForestClassifier(n_estimators=10)),
+    ]
+)
 
 run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
 myrun = run.publish()
@@ -125,17 +132,13 @@
 task = openml.tasks.get_task(6)
 
 # The following lines can then be executed offline:
-run = openml.runs.run_model_on_task(
-    pipe,
-    task,
-    avoid_duplicate_runs=False,
-    upload_flow=False)
+run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False, upload_flow=False)
 
 # The run may be stored offline, and the flow will be stored along with it:
-run.to_filesystem(directory='myrun')
+run.to_filesystem(directory="myrun")
 
 # They may be loaded and uploaded at a later time
-run = openml.runs.OpenMLRun.from_filesystem(directory='myrun')
+run = openml.runs.OpenMLRun.from_filesystem(directory="myrun")
 run.publish()
 
 # Publishing the run will automatically upload the related flow if
@@ -177,7 +180,7 @@
 #   task_id:`52950 <http://www.openml.org/t/52950>`_, 100k instances, missing values.
 
 # Easy benchmarking:
-for task_id in [115, ]:  # Add further tasks. Disclaimer: they might take some time
+for task_id in [115]:  # Add further tasks. Disclaimer: they might take some time
     task = openml.tasks.get_task(task_id)
     data = openml.datasets.get_dataset(task.dataset_id)
     clf = neighbors.KNeighborsClassifier(n_neighbors=5)
diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index 7ae054a94..aac84bcd4 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -19,10 +19,10 @@
 # evaluations conducted by the server with the hyperparameter settings extracted from the
 # uploaded runs (called *setup*).
 df = openml.evaluations.list_evaluations_setups(
-    function='predictive_accuracy',
+    function="predictive_accuracy",
     flow=[8353],
     task=[6],
-    output_format='dataframe',
+    output_format="dataframe",
     # Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise,
     # the dataframe would contain a field ``paramaters`` containing an unparsed dictionary.
     parameters_in_separate_columns=True,
@@ -37,7 +37,7 @@
 ####################################################################################################
 # Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we
 # can nicely plot them.
-hyperparameters = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
+hyperparameters = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
 df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log10)
 
 ####################################################################################################
@@ -45,12 +45,12 @@
 # ===================================================
 #
 df.plot.hexbin(
-    x='sklearn.svm.classes.SVC(16)_C',
-    y='sklearn.svm.classes.SVC(16)_gamma',
-    C='value',
+    x="sklearn.svm.classes.SVC(16)_C",
+    y="sklearn.svm.classes.SVC(16)_gamma",
+    C="value",
     reduce_C_function=np.mean,
     gridsize=25,
-    title='SVM performance landscape',
+    title="SVM performance landscape",
 )
 
 ####################################################################################################
@@ -61,12 +61,12 @@
 
 fig, ax = plt.subplots()
 
-C = df['sklearn.svm.classes.SVC(16)_C']
-gamma = df['sklearn.svm.classes.SVC(16)_gamma']
-score = df['value']
+C = df["sklearn.svm.classes.SVC(16)_C"]
+gamma = df["sklearn.svm.classes.SVC(16)_gamma"]
+score = df["value"]
 
 # Plotting all evaluations:
-ax.plot(C, gamma, 'ko', ms=1)
+ax.plot(C, gamma, "ko", ms=1)
 # Create a contour plot
 cntr = ax.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
 # Adjusting the colorbar
@@ -78,4 +78,4 @@
     xlabel="C (log10)",
     ylabel="gamma (log10)",
 )
-ax.set_title('SVM performance landscape')
+ax.set_title("SVM performance landscape")
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index 071cc51b1..be438e728 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -53,8 +53,7 @@
 # many potential hyperparameters. Of course, the model can be as complex and as
 # easy as you want it to be
 model_original = sklearn.pipeline.make_pipeline(
-    sklearn.impute.SimpleImputer(),
-    sklearn.ensemble.RandomForestClassifier()
+    sklearn.impute.SimpleImputer(), sklearn.ensemble.RandomForestClassifier()
 )
 
 
@@ -63,20 +62,17 @@
 # the purpose of this tutorial we set them to some specific values that might
 # or might not be optimal
 hyperparameters_original = {
-    'simpleimputer__strategy': 'median',
-    'randomforestclassifier__criterion': 'entropy',
-    'randomforestclassifier__max_features': 0.2,
-    'randomforestclassifier__min_samples_leaf': 1,
-    'randomforestclassifier__n_estimators': 16,
-    'randomforestclassifier__random_state': 42,
+    "simpleimputer__strategy": "median",
+    "randomforestclassifier__criterion": "entropy",
+    "randomforestclassifier__max_features": 0.2,
+    "randomforestclassifier__min_samples_leaf": 1,
+    "randomforestclassifier__n_estimators": 16,
+    "randomforestclassifier__random_state": 42,
 }
 model_original.set_params(**hyperparameters_original)
 
 # solve the task and upload the result (this implicitly creates the flow)
-run = openml.runs.run_model_on_task(
-    model_original,
-    task,
-    avoid_duplicate_runs=False)
+run = openml.runs.run_model_on_task(model_original, task, avoid_duplicate_runs=False)
 run_original = run.publish()  # this implicitly uploads the flow
 
 ###############################################################################
@@ -93,8 +89,7 @@
 # it will automatically have all the hyperparameters set
 
 # and run the task again
-run_duplicate = openml.runs.run_model_on_task(
-    model_duplicate, task, avoid_duplicate_runs=False)
+run_duplicate = openml.runs.run_model_on_task(model_duplicate, task, avoid_duplicate_runs=False)
 
 
 ###############################################################################
@@ -102,8 +97,7 @@
 ###############################################################################
 
 # the run has stored all predictions in the field data content
-np.testing.assert_array_equal(run_original.data_content,
-                              run_duplicate.data_content)
+np.testing.assert_array_equal(run_original.data_content, run_duplicate.data_content)
 
 ###############################################################################
 
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index 9a9729a5c..b9202d7ce 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -39,7 +39,7 @@
 # * Default gives ``dict``, but we'll use ``dataframe`` to obtain an
 #   easier-to-work-with data structure
 
-studies = openml.study.list_studies(output_format='dataframe', status='all')
+studies = openml.study.list_studies(output_format="dataframe", status="all")
 print(studies.head(n=10))
 
 
@@ -64,9 +64,7 @@
 # And we can use the evaluation listing functionality to learn more about
 # the evaluations available for the conducted runs:
 evaluations = openml.evaluations.list_evaluations(
-    function='predictive_accuracy',
-    output_format='dataframe',
-    study=study.study_id,
+    function="predictive_accuracy", output_format="dataframe", study=study.study_id,
 )
 print(evaluations.head())
 
@@ -81,10 +79,12 @@
 openml.config.start_using_configuration_for_example()
 
 # Very simple classifier which ignores the feature type
-clf = sklearn.pipeline.Pipeline(steps=[
-    ('imputer', sklearn.impute.SimpleImputer()),
-    ('estimator', sklearn.tree.DecisionTreeClassifier(max_depth=5)),
-])
+clf = sklearn.pipeline.Pipeline(
+    steps=[
+        ("imputer", sklearn.impute.SimpleImputer()),
+        ("estimator", sklearn.tree.DecisionTreeClassifier(max_depth=5)),
+    ]
+)
 
 suite = openml.study.get_suite(1)
 # We'll create a study with one run on three random datasets each
@@ -101,8 +101,8 @@
 alias = uuid.uuid4().hex
 
 new_study = openml.study.create_study(
-    name='Test-Study',
-    description='Test study for the Python tutorial on studies',
+    name="Test-Study",
+    description="Test study for the Python tutorial on studies",
     run_ids=run_ids,
     alias=alias,
     benchmark_suite=suite.study_id,
diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
index b41e08e74..f583b6957 100644
--- a/examples/30_extended/suites_tutorial.py
+++ b/examples/30_extended/suites_tutorial.py
@@ -35,7 +35,7 @@
 # * Default gives ``dict``, but we'll use ``dataframe`` to obtain an
 #   easier-to-work-with data structure
 
-suites = openml.study.list_suites(output_format='dataframe', status='all')
+suites = openml.study.list_suites(output_format="dataframe", status="all")
 print(suites.head(n=10))
 
 ############################################################################
@@ -57,12 +57,12 @@
 
 ############################################################################
 # And we can use the task listing functionality to learn more about them:
-tasks = openml.tasks.list_tasks(output_format='dataframe')
+tasks = openml.tasks.list_tasks(output_format="dataframe")
 
 # Using ``@`` in `pd.DataFrame.query <
 # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html>`_
 # accesses variables outside of the current dataframe.
-tasks = tasks.query('tid in @suite.tasks')
+tasks = tasks.query("tid in @suite.tasks")
 print(tasks.describe().transpose())
 
 ############################################################################
@@ -86,8 +86,8 @@
 alias = uuid.uuid4().hex
 
 new_suite = openml.study.create_benchmark_suite(
-    name='Test-Suite',
-    description='Test suite for the Python tutorial on benchmark suites',
+    name="Test-Suite",
+    description="Test suite for the Python tutorial on benchmark suites",
     task_ids=task_ids_for_suite,
     alias=alias,
 )
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index 7ec824e38..c879e9fea 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -43,7 +43,7 @@
 # single repeat, a single fold and a single sample size:
 
 print(
-    'Task {}: number of repeats: {}, number of folds: {}, number of samples {}.'.format(
+    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
         task_id, n_repeats, n_folds, n_samples,
     )
 )
@@ -53,11 +53,7 @@
 # samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
 # sizes, but we can neglect this here as there is only a single repetition.
 
-train_indices, test_indices = task.get_train_test_split_indices(
-    repeat=0,
-    fold=0,
-    sample=0,
-)
+train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
 
 print(train_indices.shape, train_indices.dtype)
 print(test_indices.shape, test_indices.dtype)
@@ -72,7 +68,7 @@
 y_test = y[test_indices]
 
 print(
-    'X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}'.format(
+    "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
         X_train.shape, y_train.shape, X_test.shape, y_test.shape,
     )
 )
@@ -84,7 +80,7 @@
 task = openml.tasks.get_task(task_id)
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    'Task {}: number of repeats: {}, number of folds: {}, number of samples {}.'.format(
+    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
         task_id, n_repeats, n_folds, n_samples,
     )
 )
@@ -95,9 +91,7 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx,
-                fold=fold_idx,
-                sample=sample_idx,
+                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
             X_train = X.loc[train_indices]
             y_train = y[train_indices]
@@ -105,9 +99,14 @@
             y_test = y[test_indices]
 
             print(
-                'Repeat #{}, fold #{}, samples {}: X_train.shape: {}, '
-                'y_train.shape {}, X_test.shape {}, y_test.shape {}'.format(
-                    repeat_idx, fold_idx, sample_idx, X_train.shape, y_train.shape, X_test.shape,
+                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
+                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
+                    repeat_idx,
+                    fold_idx,
+                    sample_idx,
+                    X_train.shape,
+                    y_train.shape,
+                    X_test.shape,
                     y_test.shape,
                 )
             )
@@ -119,7 +118,7 @@
 task = openml.tasks.get_task(task_id)
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    'Task {}: number of repeats: {}, number of folds: {}, number of samples {}.'.format(
+    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
         task_id, n_repeats, n_folds, n_samples,
     )
 )
@@ -130,9 +129,7 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx,
-                fold=fold_idx,
-                sample=sample_idx,
+                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
             X_train = X.loc[train_indices]
             y_train = y[train_indices]
@@ -140,9 +137,14 @@
             y_test = y[test_indices]
 
             print(
-                'Repeat #{}, fold #{}, samples {}: X_train.shape: {}, '
-                'y_train.shape {}, X_test.shape {}, y_test.shape {}'.format(
-                    repeat_idx, fold_idx, sample_idx, X_train.shape, y_train.shape, X_test.shape,
+                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
+                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
+                    repeat_idx,
+                    fold_idx,
+                    sample_idx,
+                    X_train.shape,
+                    y_train.shape,
+                    X_test.shape,
                     y_test.shape,
                 )
             )
@@ -154,7 +156,7 @@
 task = openml.tasks.get_task(task_id)
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    'Task {}: number of repeats: {}, number of folds: {}, number of samples {}.'.format(
+    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
         task_id, n_repeats, n_folds, n_samples,
     )
 )
@@ -165,9 +167,7 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx,
-                fold=fold_idx,
-                sample=sample_idx,
+                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
             X_train = X.loc[train_indices]
             y_train = y[train_indices]
@@ -175,9 +175,14 @@
             y_test = y[test_indices]
 
             print(
-                'Repeat #{}, fold #{}, samples {}: X_train.shape: {}, '
-                'y_train.shape {}, X_test.shape {}, y_test.shape {}'.format(
-                    repeat_idx, fold_idx, sample_idx, X_train.shape, y_train.shape, X_test.shape,
+                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
+                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
+                    repeat_idx,
+                    fold_idx,
+                    sample_idx,
+                    X_train.shape,
+                    y_train.shape,
+                    X_test.shape,
                     y_test.shape,
                 )
             )
diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py
index e12c6f653..4befe1a07 100644
--- a/examples/30_extended/tasks_tutorial.py
+++ b/examples/30_extended/tasks_tutorial.py
@@ -38,21 +38,21 @@
 # `pandas dataframe <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html>`_
 # to have better visualization capabilities and easier access:
 
-tasks = pd.DataFrame.from_dict(tasks, orient='index')
+tasks = pd.DataFrame.from_dict(tasks, orient="index")
 print(tasks.columns)
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
 # As conversion to a pandas dataframe is a common task, we have added this functionality to the
 # OpenML-Python library which can be used by passing ``output_format='dataframe'``:
-tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format='dataframe')
+tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format="dataframe")
 print(tasks_df.head())
 
 ############################################################################
 # We can filter the list of tasks to only contain datasets with more than
 # 500 samples, but less than 1000 samples:
 
-filtered_tasks = tasks.query('NumberOfInstances > 500 and NumberOfInstances < 1000')
+filtered_tasks = tasks.query("NumberOfInstances > 500 and NumberOfInstances < 1000")
 print(list(filtered_tasks.index))
 
 ############################################################################
@@ -77,21 +77,21 @@
 #
 # Similar to listing tasks by task type, we can list tasks by tags:
 
-tasks = openml.tasks.list_tasks(tag='OpenML100', output_format='dataframe')
+tasks = openml.tasks.list_tasks(tag="OpenML100", output_format="dataframe")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
 ############################################################################
 # Furthermore, we can list tasks based on the dataset id:
 
-tasks = openml.tasks.list_tasks(data_id=1471, output_format='dataframe')
+tasks = openml.tasks.list_tasks(data_id=1471, output_format="dataframe")
 print(f"First 5 of {len(tasks)} tasks:")
 print(tasks.head())
 
 ############################################################################
 # In addition, a size limit and an offset can be applied both separately and simultaneously:
 
-tasks = openml.tasks.list_tasks(size=10, offset=50, output_format='dataframe')
+tasks = openml.tasks.list_tasks(size=10, offset=50, output_format="dataframe")
 print(tasks)
 
 ############################################################################
@@ -107,7 +107,7 @@
 # Finally, it is also possible to list all tasks on OpenML with:
 
 ############################################################################
-tasks = openml.tasks.list_tasks(output_format='dataframe')
+tasks = openml.tasks.list_tasks(output_format="dataframe")
 print(len(tasks))
 
 ############################################################################
@@ -192,16 +192,19 @@
         dataset_id=128,
         target_name="class",
         evaluation_measure="predictive_accuracy",
-        estimation_procedure_id=1)
+        estimation_procedure_id=1,
+    )
     my_task.publish()
 except openml.exceptions.OpenMLServerException as e:
     # Error code for 'task already exists'
     if e.code == 614:
         # Lookup task
-        tasks = openml.tasks.list_tasks(data_id=128, output_format='dataframe')
-        tasks = tasks.query('task_type == "Supervised Classification" '
-                            'and estimation_procedure == "10-fold Crossvalidation" '
-                            'and evaluation_measures == "predictive_accuracy"')
+        tasks = openml.tasks.list_tasks(data_id=128, output_format="dataframe")
+        tasks = tasks.query(
+            'task_type == "Supervised Classification" '
+            'and estimation_procedure == "10-fold Crossvalidation" '
+            'and evaluation_measures == "predictive_accuracy"'
+        )
         task_id = tasks.loc[:, "tid"].values[0]
         print("Task already exists. Task ID is", task_id)
 
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index 58b242add..c68189784 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -24,6 +24,7 @@
 ####################################################################################################
 # List of dataset IDs given in the supplementary material of Feurer et al.:
 # https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning-supplemental.zip
+# fmt: off
 dataset_ids = [
     3, 6, 12, 14, 16, 18, 21, 22, 23, 24, 26, 28, 30, 31, 32, 36, 38, 44, 46,
     57, 60, 179, 180, 181, 182, 184, 185, 273, 293, 300, 351, 354, 357, 389,
@@ -36,6 +37,7 @@
     1056, 1067, 1068, 1069, 1111, 1112, 1114, 1116, 1119, 1120, 1128, 1130,
     1134, 1138, 1139, 1142, 1146, 1161, 1166,
 ]
+# fmt: on
 
 ####################################################################################################
 # The dataset IDs could be used directly to load the dataset and split the data into a training set
@@ -57,8 +59,8 @@
 # datasets can be found in the `online docs <https://docs.openml.org/#dataset-status>`_.
 tasks = openml.tasks.list_tasks(
     task_type_id=openml.tasks.TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-    status='all',
-    output_format='dataframe',
+    status="all",
+    output_format="dataframe",
 )
 
 # Query only those with holdout as the resampling startegy.
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index 3f9bcc49e..74c6fde5f 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -39,41 +39,42 @@
 # for comparing svms: flow_ids = [7754, 7756]
 # for comparing nns: flow_ids = [7722, 7729]
 # for comparing dts: flow_ids = [7725], differentiate on hyper-parameter value
-classifier_family = 'SVM'
+classifier_family = "SVM"
 flow_ids = [7754, 7756]
-measure = 'predictive_accuracy'
-meta_features = ['NumberOfInstances', 'NumberOfFeatures']
-class_values = ['non-linear better', 'linear better', 'equal']
+measure = "predictive_accuracy"
+meta_features = ["NumberOfInstances", "NumberOfFeatures"]
+class_values = ["non-linear better", "linear better", "equal"]
 
 # Downloads all evaluation records related to this study
 evaluations = openml.evaluations.list_evaluations(
-    measure, flow=flow_ids, study=study_id, output_format='dataframe')
+    measure, flow=flow_ids, study=study_id, output_format="dataframe"
+)
 # gives us a table with columns data_id, flow1_value, flow2_value
-evaluations = evaluations.pivot(
-    index='data_id', columns='flow_id', values='value').dropna()
+evaluations = evaluations.pivot(index="data_id", columns="flow_id", values="value").dropna()
 # downloads all data qualities (for scatter plot)
 data_qualities = openml.datasets.list_datasets(
-    data_id=list(evaluations.index.values), output_format='dataframe')
+    data_id=list(evaluations.index.values), output_format="dataframe"
+)
 # removes irrelevant data qualities
 data_qualities = data_qualities[meta_features]
 # makes a join between evaluation table and data qualities table,
 # now we have columns data_id, flow1_value, flow2_value, meta_feature_1,
 # meta_feature_2
-evaluations = evaluations.join(data_qualities, how='inner')
+evaluations = evaluations.join(data_qualities, how="inner")
 
 # adds column that indicates the difference between the two classifiers
-evaluations['diff'] = evaluations[flow_ids[0]] - evaluations[flow_ids[1]]
+evaluations["diff"] = evaluations[flow_ids[0]] - evaluations[flow_ids[1]]
 
 
 ##############################################################################
 # makes the s-plot
 
 fig_splot, ax_splot = plt.subplots()
-ax_splot.plot(range(len(evaluations)), sorted(evaluations['diff']))
+ax_splot.plot(range(len(evaluations)), sorted(evaluations["diff"]))
 ax_splot.set_title(classifier_family)
-ax_splot.set_xlabel('Dataset (sorted)')
-ax_splot.set_ylabel('difference between linear and non-linear classifier')
-ax_splot.grid(linestyle='--', axis='y')
+ax_splot.set_xlabel("Dataset (sorted)")
+ax_splot.set_ylabel("difference between linear and non-linear classifier")
+ax_splot.grid(linestyle="--", axis="y")
 plt.show()
 
 
@@ -81,6 +82,7 @@
 # adds column that indicates the difference between the two classifiers,
 # needed for the scatter plot
 
+
 def determine_class(val_lin, val_nonlin):
     if val_lin < val_nonlin:
         return class_values[0]
@@ -90,22 +92,21 @@ def determine_class(val_lin, val_nonlin):
         return class_values[2]
 
 
-evaluations['class'] = evaluations.apply(
-    lambda row: determine_class(row[flow_ids[0]], row[flow_ids[1]]), axis=1)
+evaluations["class"] = evaluations.apply(
+    lambda row: determine_class(row[flow_ids[0]], row[flow_ids[1]]), axis=1
+)
 
 # does the plotting and formatting
 fig_scatter, ax_scatter = plt.subplots()
 for class_val in class_values:
-    df_class = evaluations[evaluations['class'] == class_val]
-    plt.scatter(df_class[meta_features[0]],
-                df_class[meta_features[1]],
-                label=class_val)
+    df_class = evaluations[evaluations["class"] == class_val]
+    plt.scatter(df_class[meta_features[0]], df_class[meta_features[1]], label=class_val)
 ax_scatter.set_title(classifier_family)
 ax_scatter.set_xlabel(meta_features[0])
 ax_scatter.set_ylabel(meta_features[1])
 ax_scatter.legend()
-ax_scatter.set_xscale('log')
-ax_scatter.set_yscale('log')
+ax_scatter.set_xscale("log")
+ax_scatter.set_yscale("log")
 plt.show()
 
 ##############################################################################
@@ -113,7 +114,7 @@ def determine_class(val_lin, val_nonlin):
 # two algorithms on various axis (not in the paper)
 
 fig_diagplot, ax_diagplot = plt.subplots()
-ax_diagplot.grid(linestyle='--')
+ax_diagplot.grid(linestyle="--")
 ax_diagplot.plot([0, 1], ls="-", color="black")
 ax_diagplot.plot([0.2, 1.2], ls="--", color="black")
 ax_diagplot.plot([-0.2, 0.8], ls="--", color="black")
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index ae2a0672e..e5d998e35 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -20,8 +20,10 @@
 
 import sys
 
-if sys.platform == 'win32':  # noqa
-    print('The pyrfr library (requirement of fanova) can currently not be installed on Windows systems')
+if sys.platform == "win32":  # noqa
+    print(
+        "The pyrfr library (requirement of fanova) can currently not be installed on Windows systems"
+    )
     exit()
 
 import json
@@ -65,12 +67,10 @@
 # this, please see:
 # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401
 
-suite = openml.study.get_suite('OpenML100')
+suite = openml.study.get_suite("OpenML100")
 flow_id = 7707
-parameter_filters = {
-    'sklearn.svm.classes.SVC(17)_kernel': 'sigmoid'
-}
-evaluation_measure = 'predictive_accuracy'
+parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"}
+evaluation_measure = "predictive_accuracy"
 limit_per_task = 500
 limit_nr_tasks = 15
 n_trees = 16
@@ -81,13 +81,20 @@
 for idx, task_id in enumerate(suite.tasks):
     if limit_nr_tasks is not None and idx >= limit_nr_tasks:
         continue
-    print('Starting with task %d (%d/%d)'
-          % (task_id, idx+1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks))
+    print(
+        "Starting with task %d (%d/%d)"
+        % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks)
+    )
     # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
     evals = openml.evaluations.list_evaluations_setups(
-        evaluation_measure, flow=[flow_id], task=[task_id], size=limit_per_task, output_format='dataframe')
-
-    performance_column = 'value'
+        evaluation_measure,
+        flow=[flow_id],
+        task=[task_id],
+        size=limit_per_task,
+        output_format="dataframe",
+    )
+
+    performance_column = "value"
     # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance
     # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine
     # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format
@@ -95,40 +102,58 @@
     # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the
     # setups that belong to the flows embedded in this example though.
     try:
-        setups_evals = pd.DataFrame([dict(**{name: json.loads(value) for name, value in setup['parameters'].items()},
-                                          **{performance_column: setup[performance_column]})
-                                     for _, setup in evals.iterrows()])
+        setups_evals = pd.DataFrame(
+            [
+                dict(
+                    **{name: json.loads(value) for name, value in setup["parameters"].items()},
+                    **{performance_column: setup[performance_column]}
+                )
+                for _, setup in evals.iterrows()
+            ]
+        )
     except json.decoder.JSONDecodeError as e:
-        print('Task %d error: %s' % (task_id, e))
+        print("Task %d error: %s" % (task_id, e))
         continue
     # apply our filters, to have only the setups that comply to the hyperparameters we want
     for filter_key, filter_value in parameter_filters.items():
         setups_evals = setups_evals[setups_evals[filter_key] == filter_value]
     # in this simplified example, we only display numerical and float hyperparameters. For categorical hyperparameters,
     # the fanova library needs to be informed by using a configspace object.
-    setups_evals = setups_evals.select_dtypes(include=['int64', 'float64'])
+    setups_evals = setups_evals.select_dtypes(include=["int64", "float64"])
     # drop rows with unique values. These are by definition not an interesting hyperparameter, e.g., ``axis``,
     # ``verbose``.
-    setups_evals = setups_evals[[c for c in list(setups_evals)
-                                 if len(setups_evals[c].unique()) > 1 or c == performance_column]]
+    setups_evals = setups_evals[
+        [
+            c
+            for c in list(setups_evals)
+            if len(setups_evals[c].unique()) > 1 or c == performance_column
+        ]
+    ]
     # We are done with processing ``setups_evals``. Note that we still might have some irrelevant hyperparameters, e.g.,
     # ``random_state``. We have dropped some relevant hyperparameters, i.e., several categoricals. Let's check it out:
 
     # determine x values to pass to fanova library
-    parameter_names = [pname for pname in setups_evals.columns.to_numpy() if pname != performance_column]
+    parameter_names = [
+        pname for pname in setups_evals.columns.to_numpy() if pname != performance_column
+    ]
     evaluator = fanova.fanova.fANOVA(
-        X=setups_evals[parameter_names].to_numpy(), Y=setups_evals[performance_column].to_numpy(), n_trees=n_trees)
+        X=setups_evals[parameter_names].to_numpy(),
+        Y=setups_evals[performance_column].to_numpy(),
+        n_trees=n_trees,
+    )
     for idx, pname in enumerate(parameter_names):
         try:
-            fanova_results.append({
-                'hyperparameter': pname.split(".")[-1],
-                'fanova': evaluator.quantify_importance([idx])[(idx,)]['individual importance']
-            })
+            fanova_results.append(
+                {
+                    "hyperparameter": pname.split(".")[-1],
+                    "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"],
+                }
+            )
         except RuntimeError as e:
             # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant
             # for all configurations (there is no variance). We will skip these tasks (like the authors did in the
             # paper).
-            print('Task %d error: %s' % (task_id, e))
+            print("Task %d error: %s" % (task_id, e))
             continue
 
 # transform ``fanova_results`` from a list of dicts into a DataFrame
@@ -140,9 +165,9 @@
 # ``Orange`` dependency (``pip install Orange3``). For the complete example,
 # the reader is referred to the more elaborate script (referred to earlier)
 fig, ax = plt.subplots()
-sns.boxplot(x='hyperparameter', y='fanova', data=fanova_results, ax=ax)
-ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
-ax.set_ylabel('Variance Contribution')
+sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax)
+ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
+ax.set_ylabel("Variance Contribution")
 ax.set_xlabel(None)
 plt.tight_layout()
 plt.show()
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 2127bdfe4..8639e0a3a 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -38,16 +38,14 @@
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.ensemble import RandomForestRegressor
 
-flow_type = 'svm'  # this example will use the smaller svm flow evaluations
+flow_type = "svm"  # this example will use the smaller svm flow evaluations
 ############################################################################
 # The subsequent functions are defined to fetch tasks, flows, evaluations and preprocess them into
 # a tabular format that can be used to build models.
 
 
-def fetch_evaluations(run_full=False,
-                      flow_type='svm',
-                      metric='area_under_roc_curve'):
-    '''
+def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_curve"):
+    """
     Fetch a list of evaluations based on the flows and tasks used in the experiments.
 
     Parameters
@@ -65,17 +63,18 @@ def fetch_evaluations(run_full=False,
     eval_df : dataframe
     task_ids : list
     flow_id : int
-    '''
+    """
     # Collecting task IDs as used by the experiments from the paper
-    if flow_type == 'svm' and run_full:
+    # fmt: off
+    if flow_type == "svm" and run_full:
         task_ids = [
             10101, 145878, 146064, 14951, 34537, 3485, 3492, 3493, 3494,
             37, 3889, 3891, 3899, 3902, 3903, 3913, 3918, 3950, 9889,
             9914, 9946, 9952, 9967, 9971, 9976, 9978, 9980, 9983,
         ]
-    elif flow_type == 'svm' and not run_full:
+    elif flow_type == "svm" and not run_full:
         task_ids = [9983, 3485, 3902, 3903, 145878]
-    elif flow_type == 'xgboost' and run_full:
+    elif flow_type == "xgboost" and run_full:
         task_ids = [
             10093, 10101, 125923, 145847, 145857, 145862, 145872, 145878,
             145953, 145972, 145976, 145979, 146064, 14951, 31, 3485,
@@ -84,25 +83,27 @@ def fetch_evaluations(run_full=False,
         ]
     else:  # flow_type == 'xgboost' and not run_full:
         task_ids = [3903, 37, 3485, 49, 3913]
+    # fmt: on
 
     # Fetching the relevant flow
-    flow_id = 5891 if flow_type == 'svm' else 6767
+    flow_id = 5891 if flow_type == "svm" else 6767
 
     # Fetching evaluations
-    eval_df = openml.evaluations.list_evaluations_setups(function=metric,
-                                                         task=task_ids,
-                                                         flow=[flow_id],
-                                                         uploader=[2702],
-                                                         output_format='dataframe',
-                                                         parameters_in_separate_columns=True)
+    eval_df = openml.evaluations.list_evaluations_setups(
+        function=metric,
+        task=task_ids,
+        flow=[flow_id],
+        uploader=[2702],
+        output_format="dataframe",
+        parameters_in_separate_columns=True,
+    )
     return eval_df, task_ids, flow_id
 
 
-def create_table_from_evaluations(eval_df,
-                                  flow_type='svm',
-                                  run_count=np.iinfo(np.int64).max,
-                                  task_ids=None):
-    '''
+def create_table_from_evaluations(
+    eval_df, flow_type="svm", run_count=np.iinfo(np.int64).max, task_ids=None
+):
+    """
     Create a tabular data with its ground truth from a dataframe of evaluations.
     Optionally, can filter out records based on task ids.
 
@@ -121,29 +122,36 @@ def create_table_from_evaluations(eval_df,
     -------
     eval_table : dataframe
     values : list
-    '''
+    """
     if task_ids is not None:
-        eval_df = eval_df[eval_df['task_id'].isin(task_ids)]
-    if flow_type == 'svm':
-        colnames = ['cost', 'degree', 'gamma', 'kernel']
+        eval_df = eval_df[eval_df["task_id"].isin(task_ids)]
+    if flow_type == "svm":
+        colnames = ["cost", "degree", "gamma", "kernel"]
     else:
         colnames = [
-            'alpha', 'booster', 'colsample_bylevel', 'colsample_bytree',
-            'eta', 'lambda', 'max_depth', 'min_child_weight', 'nrounds',
-            'subsample',
+            "alpha",
+            "booster",
+            "colsample_bylevel",
+            "colsample_bytree",
+            "eta",
+            "lambda",
+            "max_depth",
+            "min_child_weight",
+            "nrounds",
+            "subsample",
         ]
     eval_df = eval_df.sample(frac=1)  # shuffling rows
     eval_df = eval_df.iloc[:run_count, :]
-    eval_df.columns = [column.split('_')[-1] for column in eval_df.columns]
+    eval_df.columns = [column.split("_")[-1] for column in eval_df.columns]
     eval_table = eval_df.loc[:, colnames]
-    value = eval_df.loc[:, 'value']
+    value = eval_df.loc[:, "value"]
     return eval_table, value
 
 
-def list_categorical_attributes(flow_type='svm'):
-    if flow_type == 'svm':
-        return ['kernel']
-    return ['booster']
+def list_categorical_attributes(flow_type="svm"):
+    if flow_type == "svm":
+        return ["kernel"]
+    return ["booster"]
 
 
 #############################################################################
@@ -170,21 +178,21 @@ def list_categorical_attributes(flow_type='svm'):
 num_cols = list(set(X.columns) - set(cat_cols))
 
 # Missing value imputers
-cat_imputer = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value='None')
-num_imputer = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-1)
+cat_imputer = SimpleImputer(missing_values=np.nan, strategy="constant", fill_value="None")
+num_imputer = SimpleImputer(missing_values=np.nan, strategy="constant", fill_value=-1)
 
 # Creating the one-hot encoder
-enc = OneHotEncoder(handle_unknown='ignore')
+enc = OneHotEncoder(handle_unknown="ignore")
 
 # Pipeline to handle categorical column transformations
-cat_transforms = Pipeline(steps=[('impute', cat_imputer), ('encode', enc)])
+cat_transforms = Pipeline(steps=[("impute", cat_imputer), ("encode", enc)])
 
 # Combining column transformers
-ct = ColumnTransformer([('cat', cat_transforms, cat_cols), ('num', num_imputer, num_cols)])
+ct = ColumnTransformer([("cat", cat_transforms, cat_cols), ("num", num_imputer, num_cols)])
 
 # Creating the full pipeline with the surrogate model
 clf = RandomForestRegressor(n_estimators=50)
-model = Pipeline(steps=[('preprocess', ct), ('surrogate', clf)])
+model = Pipeline(steps=[("preprocess", ct), ("surrogate", clf)])
 
 
 #############################################################################
@@ -197,7 +205,7 @@ def list_categorical_attributes(flow_type='svm'):
 # Selecting a task for the surrogate
 task_id = task_ids[-1]
 print("Task ID : ", task_id)
-X, y = create_table_from_evaluations(eval_df, task_ids=[task_id], flow_type='svm')
+X, y = create_table_from_evaluations(eval_df, task_ids=[task_id], flow_type="svm")
 
 model.fit(X, y)
 y_pred = model.predict(X)
@@ -217,11 +225,13 @@ def list_categorical_attributes(flow_type='svm'):
 
 # Sampling random configurations
 def random_sample_configurations(num_samples=100):
-    colnames = ['cost', 'degree', 'gamma', 'kernel']
-    ranges = [(0.000986, 998.492437),
-              (2.0, 5.0),
-              (0.000988, 913.373845),
-              (['linear', 'polynomial', 'radial', 'sigmoid'])]
+    colnames = ["cost", "degree", "gamma", "kernel"]
+    ranges = [
+        (0.000986, 998.492437),
+        (2.0, 5.0),
+        (0.000988, 913.373845),
+        (["linear", "polynomial", "radial", "sigmoid"]),
+    ]
     X = pd.DataFrame(np.nan, index=range(num_samples), columns=colnames)
     for i in range(len(colnames)):
         if len(ranges[i]) == 2:
@@ -245,6 +255,6 @@ def random_sample_configurations(num_samples=100):
 
 # plotting the regret curve
 plt.plot(regret)
-plt.title('AUC regret for Random Search on surrogate')
-plt.xlabel('Numbe of function evaluations')
-plt.ylabel('Regret')
+plt.title("AUC regret for Random Search on surrogate")
+plt.xlabel("Numbe of function evaluations")
+plt.ylabel("Regret")
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 000000000..7f3f8cefb
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+# Reports any config lines that are not recognized
+warn_unused_configs=True
+
+ignore_missing_imports=True
+follow_imports=skip
diff --git a/openml/__init__.py b/openml/__init__.py
index aef8a2aec..621703332 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -49,8 +49,7 @@
 from .__version__ import __version__  # noqa: F401
 
 
-def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
-                   run_ids=None):
+def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None, run_ids=None):
     """
     Populate a cache for offline and parallel usage of the OpenML connector.
 
@@ -86,34 +85,34 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
 
 
 __all__ = [
-    'OpenMLDataset',
-    'OpenMLDataFeature',
-    'OpenMLRun',
-    'OpenMLSplit',
-    'OpenMLEvaluation',
-    'OpenMLSetup',
-    'OpenMLParameter',
-    'OpenMLTask',
-    'OpenMLSupervisedTask',
-    'OpenMLClusteringTask',
-    'OpenMLLearningCurveTask',
-    'OpenMLRegressionTask',
-    'OpenMLClassificationTask',
-    'OpenMLFlow',
-    'OpenMLStudy',
-    'OpenMLBenchmarkSuite',
-    'datasets',
-    'evaluations',
-    'exceptions',
-    'extensions',
-    'config',
-    'runs',
-    'flows',
-    'tasks',
-    'setups',
-    'study',
-    'utils',
-    '_api_calls',
+    "OpenMLDataset",
+    "OpenMLDataFeature",
+    "OpenMLRun",
+    "OpenMLSplit",
+    "OpenMLEvaluation",
+    "OpenMLSetup",
+    "OpenMLParameter",
+    "OpenMLTask",
+    "OpenMLSupervisedTask",
+    "OpenMLClusteringTask",
+    "OpenMLLearningCurveTask",
+    "OpenMLRegressionTask",
+    "OpenMLClassificationTask",
+    "OpenMLFlow",
+    "OpenMLStudy",
+    "OpenMLBenchmarkSuite",
+    "datasets",
+    "evaluations",
+    "exceptions",
+    "extensions",
+    "config",
+    "runs",
+    "flows",
+    "tasks",
+    "setups",
+    "study",
+    "utils",
+    "_api_calls",
 ]
 
 # Load the scikit-learn extension by default
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index c357dc3d0..57599b912 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -8,8 +8,12 @@
 from typing import Dict, Optional
 
 from . import config
-from .exceptions import (OpenMLServerError, OpenMLServerException,
-                         OpenMLServerNoResult, OpenMLHashException)
+from .exceptions import (
+    OpenMLServerError,
+    OpenMLServerException,
+    OpenMLServerNoResult,
+    OpenMLHashException,
+)
 
 
 def _perform_api_call(call, request_method, data=None, file_elements=None):
@@ -44,13 +48,13 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
         url += "/"
     url += call
 
-    url = url.replace('=', '%3d')
-    logging.info('Starting [%s] request for the URL %s', request_method, url)
+    url = url.replace("=", "%3d")
+    logging.info("Starting [%s] request for the URL %s", request_method, url)
     start = time.time()
 
     if file_elements is not None:
-        if request_method != 'post':
-            raise ValueError('request method must be post when file elements are present')
+        if request_method != "post":
+            raise ValueError("request method must be post when file elements are present")
         response = __read_url_files(url, data=data, file_elements=file_elements)
     else:
         response = __read_url(url, request_method, data)
@@ -58,20 +62,18 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     __check_response(response, url, file_elements)
 
     logging.info(
-        '%.7fs taken for [%s] request for the URL %s',
-        time.time() - start,
-        request_method,
-        url,
+        "%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
     )
     return response.text
 
 
-def _download_text_file(source: str,
-                        output_path: Optional[str] = None,
-                        md5_checksum: str = None,
-                        exists_ok: bool = True,
-                        encoding: str = 'utf8',
-                        ) -> Optional[str]:
+def _download_text_file(
+    source: str,
+    output_path: Optional[str] = None,
+    md5_checksum: str = None,
+    exists_ok: bool = True,
+    encoding: str = "utf8",
+) -> Optional[str]:
     """ Download the text file at `source` and store it in `output_path`.
 
     By default, do nothing if a file already exists in `output_path`.
@@ -101,27 +103,26 @@ def _download_text_file(source: str,
         except FileNotFoundError:
             pass
 
-    logging.info('Starting [%s] request for the URL %s', 'get', source)
+    logging.info("Starting [%s] request for the URL %s", "get", source)
     start = time.time()
-    response = __read_url(source, request_method='get')
+    response = __read_url(source, request_method="get")
     __check_response(response, source, None)
     downloaded_file = response.text
 
     if md5_checksum is not None:
         md5 = hashlib.md5()
-        md5.update(downloaded_file.encode('utf-8'))
+        md5.update(downloaded_file.encode("utf-8"))
         md5_checksum_download = md5.hexdigest()
         if md5_checksum != md5_checksum_download:
             raise OpenMLHashException(
-                'Checksum {} of downloaded file is unequal to the expected checksum {}.'
-                .format(md5_checksum_download, md5_checksum))
+                "Checksum {} of downloaded file is unequal to the expected checksum {}.".format(
+                    md5_checksum_download, md5_checksum
+                )
+            )
 
     if output_path is None:
         logging.info(
-            '%.7fs taken for [%s] request for the URL %s',
-            time.time() - start,
-            'get',
-            source,
+            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
         )
         return downloaded_file
 
@@ -130,10 +131,7 @@ def _download_text_file(source: str,
             fh.write(downloaded_file)
 
         logging.info(
-            '%.7fs taken for [%s] request for the URL %s',
-            time.time() - start,
-            'get',
-            source,
+            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
         )
 
         del downloaded_file
@@ -143,9 +141,10 @@ def _download_text_file(source: str,
 def __check_response(response, url, file_elements):
     if response.status_code != 200:
         raise __parse_server_exception(response, url, file_elements=file_elements)
-    elif 'Content-Encoding' not in response.headers or \
-            response.headers['Content-Encoding'] != 'gzip':
-        logging.warning('Received uncompressed content from OpenML for {}.'.format(url))
+    elif (
+        "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip"
+    ):
+        logging.warning("Received uncompressed content from OpenML for {}.".format(url))
 
 
 def _file_id_to_url(file_id, filename=None):
@@ -153,10 +152,10 @@ def _file_id_to_url(file_id, filename=None):
      Presents the URL how to download a given file id
      filename is optional
     """
-    openml_url = config.server.split('/api/')
-    url = openml_url[0] + '/data/download/%s' % file_id
+    openml_url = config.server.split("/api/")
+    url = openml_url[0] + "/data/download/%s" % file_id
     if filename is not None:
-        url += '/' + filename
+        url += "/" + filename
     return url
 
 
@@ -165,33 +164,25 @@ def __read_url_files(url, data=None, file_elements=None):
     and sending file_elements as files"""
 
     data = {} if data is None else data
-    data['api_key'] = config.apikey
+    data["api_key"] = config.apikey
     if file_elements is None:
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = __send_request(
-        request_method='post',
-        url=url,
-        data=data,
-        files=file_elements,
-    )
+    response = __send_request(request_method="post", url=url, data=data, files=file_elements,)
     return response
 
 
 def __read_url(url, request_method, data=None):
     data = {} if data is None else data
     if config.apikey is not None:
-        data['api_key'] = config.apikey
+        data["api_key"] = config.apikey
 
     return __send_request(request_method=request_method, url=url, data=data)
 
 
 def __send_request(
-    request_method,
-    url,
-    data,
-    files=None,
+    request_method, url, data, files=None,
 ):
     n_retries = config.connection_n_retries
     response = None
@@ -199,73 +190,60 @@ def __send_request(
         # Start at one to have a non-zero multiplier for the sleep
         for i in range(1, n_retries + 1):
             try:
-                if request_method == 'get':
+                if request_method == "get":
                     response = session.get(url, params=data)
-                elif request_method == 'delete':
+                elif request_method == "delete":
                     response = session.delete(url, params=data)
-                elif request_method == 'post':
+                elif request_method == "post":
                     response = session.post(url, data=data, files=files)
                 else:
                     raise NotImplementedError()
                 break
-            except (
-                    requests.exceptions.ConnectionError,
-                    requests.exceptions.SSLError,
-            ) as e:
+            except (requests.exceptions.ConnectionError, requests.exceptions.SSLError,) as e:
                 if i == n_retries:
                     raise e
                 else:
                     time.sleep(0.1 * i)
     if response is None:
-        raise ValueError('This should never happen!')
+        raise ValueError("This should never happen!")
     return response
 
 
 def __parse_server_exception(
-    response: requests.Response,
-    url: str,
-    file_elements: Dict,
+    response: requests.Response, url: str, file_elements: Dict,
 ) -> OpenMLServerError:
 
     if response.status_code == 414:
-        raise OpenMLServerError('URI too long! ({})'.format(url))
+        raise OpenMLServerError("URI too long! ({})".format(url))
     try:
         server_exception = xmltodict.parse(response.text)
     except Exception:
         # OpenML has a sophisticated error system
         # where information about failures is provided. try to parse this
         raise OpenMLServerError(
-            'Unexpected server error when calling {}. Please contact the developers!\n'
-            'Status code: {}\n{}'.format(url, response.status_code, response.text))
+            "Unexpected server error when calling {}. Please contact the developers!\n"
+            "Status code: {}\n{}".format(url, response.status_code, response.text)
+        )
 
-    server_error = server_exception['oml:error']
-    code = int(server_error['oml:code'])
-    message = server_error['oml:message']
-    additional_information = server_error.get('oml:additional_information')
+    server_error = server_exception["oml:error"]
+    code = int(server_error["oml:code"])
+    message = server_error["oml:message"]
+    additional_information = server_error.get("oml:additional_information")
     if code in [372, 512, 500, 482, 542, 674]:
         if additional_information:
-            full_message = '{} - {}'.format(message, additional_information)
+            full_message = "{} - {}".format(message, additional_information)
         else:
             full_message = message
 
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
-        return OpenMLServerNoResult(
-            code=code,
-            message=full_message,
-        )
+        return OpenMLServerNoResult(code=code, message=full_message,)
     # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
-    if code in [163] and file_elements is not None and 'description' in file_elements:
+    if code in [163] and file_elements is not None and "description" in file_elements:
         # file_elements['description'] is the XML file description of the flow
-        full_message = '\n{}\n{} - {}'.format(
-            file_elements['description'],
-            message,
-            additional_information,
+        full_message = "\n{}\n{} - {}".format(
+            file_elements["description"], message, additional_information,
         )
     else:
-        full_message = '{} - {}'.format(message, additional_information)
-    return OpenMLServerException(
-        code=code,
-        message=full_message,
-        url=url
-    )
+        full_message = "{} - {}".format(message, additional_information)
+    return OpenMLServerException(code=code, message=full_message, url=url)
diff --git a/openml/base.py b/openml/base.py
index 1e98efcca..1b6e5ccc7 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -43,7 +43,7 @@ def _entity_letter(cls) -> str:
         """ Return the letter which represents the entity type in urls, e.g. 'f' for flow."""
         # We take advantage of the class naming convention (OpenMLX),
         # which holds for all entities except studies and tasks, which overwrite this method.
-        return cls.__name__.lower()[len('OpenML'):][0]
+        return cls.__name__.lower()[len("OpenML") :][0]
 
     @abstractmethod
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
@@ -68,18 +68,19 @@ def _apply_repr_template(self, body_fields: List[Tuple[str, str]]) -> str:
             A list of (name, value) pairs to display in the body of the __repr__.
          """
         # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
-        name_with_spaces = re.sub(r"(\w)([A-Z])", r"\1 \2",
-                                  self.__class__.__name__[len('OpenML'):])
-        header_text = 'OpenML {}'.format(name_with_spaces)
-        header = '{}\n{}\n'.format(header_text, '=' * len(header_text))
+        name_with_spaces = re.sub(
+            r"(\w)([A-Z])", r"\1 \2", self.__class__.__name__[len("OpenML") :]
+        )
+        header_text = "OpenML {}".format(name_with_spaces)
+        header = "{}\n{}\n".format(header_text, "=" * len(header_text))
 
         longest_field_name_length = max(len(name) for name, value in body_fields)
         field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
-        body = '\n'.join(field_line_format.format(name, value) for name, value in body_fields)
+        body = "\n".join(field_line_format.format(name, value) for name, value in body_fields)
         return header + body
 
     @abstractmethod
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self.
 
         Uses OrderedDict to ensure consistent ordering when converting to xml.
@@ -103,7 +104,7 @@ def _to_xml(self) -> str:
 
         # A task may not be uploaded with the xml encoding specification:
         # <?xml version="1.0" encoding="utf-8"?>
-        encoding_specification, xml_body = xml_representation.split('\n', 1)
+        encoding_specification, xml_body = xml_representation.split("\n", 1)
         return xml_body
 
     def _get_file_elements(self) -> Dict:
@@ -119,15 +120,15 @@ def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
         pass
 
-    def publish(self) -> 'OpenMLBase':
+    def publish(self) -> "OpenMLBase":
         file_elements = self._get_file_elements()
 
-        if 'description' not in file_elements:
-            file_elements['description'] = self._to_xml()
+        if "description" not in file_elements:
+            file_elements["description"] = self._to_xml()
 
-        call = '{}/'.format(_get_rest_api_type_alias(self))
+        call = "{}/".format(_get_rest_api_type_alias(self))
         response_text = openml._api_calls._perform_api_call(
-            call, 'post', file_elements=file_elements
+            call, "post", file_elements=file_elements
         )
         xml_response = xmltodict.parse(response_text)
 
diff --git a/openml/config.py b/openml/config.py
index 8c4de1431..296b71663 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -14,7 +14,7 @@
 from urllib.parse import urlparse
 
 logger = logging.getLogger(__name__)
-openml_logger = logging.getLogger('openml')
+openml_logger = logging.getLogger("openml")
 console_handler = None
 file_handler = None
 
@@ -26,14 +26,14 @@ def _create_log_handlers():
         logger.debug("Requested to create log handlers, but they are already created.")
         return
 
-    message_format = '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s'
-    output_formatter = logging.Formatter(message_format, datefmt='%H:%M:%S')
+    message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s"
+    output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S")
 
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(output_formatter)
 
     one_mb = 2 ** 20
-    log_path = os.path.join(cache_directory, 'openml_python.log')
+    log_path = os.path.join(cache_directory, "openml_python.log")
     file_handler = logging.handlers.RotatingFileHandler(
         log_path, maxBytes=one_mb, backupCount=1, delay=True
     )
@@ -44,8 +44,13 @@ def _convert_log_levels(log_level: int) -> Tuple[int, int]:
     """ Converts a log level that's either defined by OpenML/Python to both specifications. """
     # OpenML verbosity level don't match Python values directly:
     openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
-    python_to_openml = {logging.DEBUG: 2, logging.INFO: 1, logging.WARNING: 0,
-                        logging.CRITICAL: 0, logging.ERROR: 0}
+    python_to_openml = {
+        logging.DEBUG: 2,
+        logging.INFO: 1,
+        logging.WARNING: 0,
+        logging.CRITICAL: 0,
+        logging.ERROR: 0,
+    }
     # Because the dictionaries share no keys, we use `get` to convert as necessary:
     openml_level = python_to_openml.get(log_level, log_level)
     python_level = openml_to_python.get(log_level, log_level)
@@ -78,18 +83,18 @@ def set_file_log_level(file_output_level: int):
 
 # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
 _defaults = {
-    'apikey': None,
-    'server': "https://www.openml.org/api/v1/xml",
-    'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
-    'avoid_duplicate_runs': 'True',
-    'connection_n_retries': 2,
+    "apikey": None,
+    "server": "https://www.openml.org/api/v1/xml",
+    "cachedir": os.path.expanduser(os.path.join("~", ".openml", "cache")),
+    "avoid_duplicate_runs": "True",
+    "connection_n_retries": 2,
 }
 
-config_file = os.path.expanduser(os.path.join('~', '.openml', 'config'))
+config_file = os.path.expanduser(os.path.join("~", ".openml", "config"))
 
 # Default values are actually added here in the _setup() function which is
 # called at the end of this module
-server = str(_defaults['server'])  # so mypy knows it is a string
+server = str(_defaults["server"])  # so mypy knows it is a string
 
 
 def get_server_base_url() -> str:
@@ -104,17 +109,18 @@ def get_server_base_url() -> str:
     return server.split("/api")[0]
 
 
-apikey = _defaults['apikey']
+apikey = _defaults["apikey"]
 # The current cache directory (without the server name)
-cache_directory = str(_defaults['cachedir'])  # so mypy knows it is a string
-avoid_duplicate_runs = True if _defaults['avoid_duplicate_runs'] == 'True' else False
+cache_directory = str(_defaults["cachedir"])  # so mypy knows it is a string
+avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False
 
 # Number of retries if the connection breaks
-connection_n_retries = _defaults['connection_n_retries']
+connection_n_retries = _defaults["connection_n_retries"]
 
 
 class ConfigurationForExamples:
     """ Allows easy switching to and from a test configuration, used for examples. """
+
     _last_used_server = None
     _last_used_key = None
     _start_last_called = False
@@ -150,8 +156,10 @@ def stop_using_configuration_for_example(cls):
         if not cls._start_last_called:
             # We don't want to allow this because it will (likely) result in the `server` and
             # `apikey` variables being set to None.
-            raise RuntimeError("`stop_use_example_configuration` called without a saved config."
-                               "`start_use_example_configuration` must be called first.")
+            raise RuntimeError(
+                "`stop_use_example_configuration` called without a saved config."
+                "`start_use_example_configuration` must be called first."
+            )
 
         global server
         global apikey
@@ -178,16 +186,16 @@ def _setup():
 
     # read config file, create cache directory
     try:
-        os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
+        os.mkdir(os.path.expanduser(os.path.join("~", ".openml")))
     except FileExistsError:
         # For other errors, we want to propagate the error as openml does not work without cache
         pass
 
     config = _parse_config()
-    apikey = config.get('FAKE_SECTION', 'apikey')
-    server = config.get('FAKE_SECTION', 'server')
+    apikey = config.get("FAKE_SECTION", "apikey")
+    server = config.get("FAKE_SECTION", "server")
 
-    short_cache_dir = config.get('FAKE_SECTION', 'cachedir')
+    short_cache_dir = config.get("FAKE_SECTION", "cachedir")
     cache_directory = os.path.expanduser(short_cache_dir)
 
     # create the cache subdirectory
@@ -197,13 +205,12 @@ def _setup():
         # For other errors, we want to propagate the error as openml does not work without cache
         pass
 
-    avoid_duplicate_runs = config.getboolean('FAKE_SECTION',
-                                             'avoid_duplicate_runs')
-    connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries')
+    avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
+    connection_n_retries = config.get("FAKE_SECTION", "connection_n_retries")
     if connection_n_retries > 20:
         raise ValueError(
-            'A higher number of retries than 20 is not allowed to keep the '
-            'server load reasonable'
+            "A higher number of retries than 20 is not allowed to keep the "
+            "server load reasonable"
         )
 
 
@@ -215,8 +222,10 @@ def _parse_config():
         # Create an empty config file if there was none so far
         fh = open(config_file, "w")
         fh.close()
-        logger.info("Could not find a configuration file at %s. Going to "
-                    "create an empty file there." % config_file)
+        logger.info(
+            "Could not find a configuration file at %s. Going to "
+            "create an empty file there." % config_file
+        )
 
     try:
         # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
@@ -243,7 +252,7 @@ def get_cache_directory():
 
     """
     url_suffix = urlparse(server).netloc
-    reversed_url_suffix = os.sep.join(url_suffix.split('.')[::-1])
+    reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1])
     if not cache_directory:
         _cachedir = _defaults(cache_directory)
     else:
@@ -274,15 +283,13 @@ def set_cache_directory(cachedir):
 start_using_configuration_for_example = (
     ConfigurationForExamples.start_using_configuration_for_example
 )
-stop_using_configuration_for_example = (
-    ConfigurationForExamples.stop_using_configuration_for_example
-)
+stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example
 
 __all__ = [
-    'get_cache_directory',
-    'set_cache_directory',
-    'start_using_configuration_for_example',
-    'stop_using_configuration_for_example',
+    "get_cache_directory",
+    "set_cache_directory",
+    "start_using_configuration_for_example",
+    "stop_using_configuration_for_example",
 ]
 
 _setup()
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
index 9783494af..f380a1676 100644
--- a/openml/datasets/__init__.py
+++ b/openml/datasets/__init__.py
@@ -8,20 +8,20 @@
     get_datasets,
     list_datasets,
     status_update,
-    list_qualities
+    list_qualities,
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
 
 __all__ = [
-    'attributes_arff_from_df',
-    'check_datasets_active',
-    'create_dataset',
-    'get_dataset',
-    'get_datasets',
-    'list_datasets',
-    'OpenMLDataset',
-    'OpenMLDataFeature',
-    'status_update',
-    'list_qualities'
+    "attributes_arff_from_df",
+    "check_datasets_active",
+    "create_dataset",
+    "get_dataset",
+    "get_datasets",
+    "list_datasets",
+    "OpenMLDataset",
+    "OpenMLDataFeature",
+    "status_update",
+    "list_qualities",
 ]
diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py
index dfb1aa112..eb727b000 100644
--- a/openml/datasets/data_feature.py
+++ b/openml/datasets/data_feature.py
@@ -17,27 +17,32 @@ class OpenMLDataFeature(object):
         list of the possible values, in case of nominal attribute
     number_missing_values : int
     """
-    LEGAL_DATA_TYPES = ['nominal', 'numeric', 'string', 'date']
 
-    def __init__(self, index, name, data_type, nominal_values,
-                 number_missing_values):
+    LEGAL_DATA_TYPES = ["nominal", "numeric", "string", "date"]
+
+    def __init__(self, index, name, data_type, nominal_values, number_missing_values):
         if type(index) != int:
-            raise ValueError('Index is of wrong datatype')
+            raise ValueError("Index is of wrong datatype")
         if data_type not in self.LEGAL_DATA_TYPES:
-            raise ValueError('data type should be in %s, found: %s' %
-                             (str(self.LEGAL_DATA_TYPES), data_type))
-        if data_type == 'nominal':
+            raise ValueError(
+                "data type should be in %s, found: %s" % (str(self.LEGAL_DATA_TYPES), data_type)
+            )
+        if data_type == "nominal":
             if nominal_values is None:
-                raise TypeError('Dataset features require attribute `nominal_values` for nominal '
-                                'feature type.')
+                raise TypeError(
+                    "Dataset features require attribute `nominal_values` for nominal "
+                    "feature type."
+                )
             elif not isinstance(nominal_values, list):
-                raise TypeError('Argument `nominal_values` is of wrong datatype, should be list, '
-                                'but is {}'.format(type(nominal_values)))
+                raise TypeError(
+                    "Argument `nominal_values` is of wrong datatype, should be list, "
+                    "but is {}".format(type(nominal_values))
+                )
         else:
             if nominal_values is not None:
-                raise TypeError('Argument `nominal_values` must be None for non-nominal feature.')
+                raise TypeError("Argument `nominal_values` must be None for non-nominal feature.")
         if type(number_missing_values) != int:
-            raise ValueError('number_missing_values is of wrong datatype')
+            raise ValueError("number_missing_values is of wrong datatype")
 
         self.index = index
         self.name = str(name)
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 942067f8f..3b159f12a 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -100,27 +100,46 @@ class OpenMLDataset(OpenMLBase):
     dataset: string, optional
         Serialized arff dataset string.
     """
-    def __init__(self, name, description, format=None,
-                 data_format='arff', cache_format='pickle',
-                 dataset_id=None, version=None,
-                 creator=None, contributor=None, collection_date=None,
-                 upload_date=None, language=None, licence=None,
-                 url=None, default_target_attribute=None,
-                 row_id_attribute=None, ignore_attribute=None,
-                 version_label=None, citation=None, tag=None,
-                 visibility=None, original_data_url=None,
-                 paper_url=None, update_comment=None,
-                 md5_checksum=None, data_file=None, features=None,
-                 qualities=None, dataset=None):
+
+    def __init__(
+        self,
+        name,
+        description,
+        format=None,
+        data_format="arff",
+        cache_format="pickle",
+        dataset_id=None,
+        version=None,
+        creator=None,
+        contributor=None,
+        collection_date=None,
+        upload_date=None,
+        language=None,
+        licence=None,
+        url=None,
+        default_target_attribute=None,
+        row_id_attribute=None,
+        ignore_attribute=None,
+        version_label=None,
+        citation=None,
+        tag=None,
+        visibility=None,
+        original_data_url=None,
+        paper_url=None,
+        update_comment=None,
+        md5_checksum=None,
+        data_file=None,
+        features=None,
+        qualities=None,
+        dataset=None,
+    ):
         if dataset_id is None:
             if description and not re.match("^[\x00-\x7F]*$", description):
                 # not basiclatin (XSD complains)
-                raise ValueError("Invalid symbols in description: {}".format(
-                    description))
+                raise ValueError("Invalid symbols in description: {}".format(description))
             if citation and not re.match("^[\x00-\x7F]*$", citation):
                 # not basiclatin (XSD complains)
-                raise ValueError("Invalid symbols in citation: {}".format(
-                    citation))
+                raise ValueError("Invalid symbols in citation: {}".format(citation))
             if not re.match("^[a-zA-Z0-9_\\-\\.\\(\\),]+$", name):
                 # regex given by server in error message
                 raise ValueError("Invalid symbols in name: {}".format(name))
@@ -130,17 +149,22 @@ def __init__(self, name, description, format=None,
         self.name = name
         self.version = int(version) if version is not None else None
         self.description = description
-        if cache_format not in ['feather', 'pickle']:
-            raise ValueError("cache_format must be one of 'feather' or 'pickle. "
-                             "Invalid format specified: {}".format(cache_format))
+        if cache_format not in ["feather", "pickle"]:
+            raise ValueError(
+                "cache_format must be one of 'feather' or 'pickle. "
+                "Invalid format specified: {}".format(cache_format)
+            )
 
         self.cache_format = cache_format
         if format is None:
             self.format = data_format
         else:
-            warn("The format parameter in the init will be deprecated "
-                 "in the future."
-                 "Please use data_format instead", DeprecationWarning)
+            warn(
+                "The format parameter in the init will be deprecated "
+                "in the future."
+                "Please use data_format instead",
+                DeprecationWarning,
+            )
             self.format = format
         self.creator = creator
         self.contributor = contributor
@@ -156,8 +180,7 @@ def __init__(self, name, description, format=None,
         elif isinstance(ignore_attribute, list) or ignore_attribute is None:
             self.ignore_attribute = ignore_attribute
         else:
-            raise ValueError('Wrong data type for ignore_attribute. '
-                             'Should be list.')
+            raise ValueError("Wrong data type for ignore_attribute. " "Should be list.")
         self.version_label = version_label
         self.citation = citation
         self.tag = tag
@@ -173,26 +196,33 @@ def __init__(self, name, description, format=None,
 
         if features is not None:
             self.features = {}
-            for idx, xmlfeature in enumerate(features['oml:feature']):
-                nr_missing = xmlfeature.get('oml:number_of_missing_values', 0)
-                feature = OpenMLDataFeature(int(xmlfeature['oml:index']),
-                                            xmlfeature['oml:name'],
-                                            xmlfeature['oml:data_type'],
-                                            xmlfeature.get('oml:nominal_value'),
-                                            int(nr_missing))
+            for idx, xmlfeature in enumerate(features["oml:feature"]):
+                nr_missing = xmlfeature.get("oml:number_of_missing_values", 0)
+                feature = OpenMLDataFeature(
+                    int(xmlfeature["oml:index"]),
+                    xmlfeature["oml:name"],
+                    xmlfeature["oml:data_type"],
+                    xmlfeature.get("oml:nominal_value"),
+                    int(nr_missing),
+                )
                 if idx != feature.index:
-                    raise ValueError('Data features not provided '
-                                     'in right order')
+                    raise ValueError("Data features not provided " "in right order")
                 self.features[feature.index] = feature
 
         self.qualities = _check_qualities(qualities)
 
         if data_file is not None:
-            self.data_pickle_file, self.data_feather_file,\
-                self.feather_attribute_file = self._create_pickle_in_cache(data_file)
+            (
+                self.data_pickle_file,
+                self.data_feather_file,
+                self.feather_attribute_file,
+            ) = self._create_pickle_in_cache(data_file)
         else:
-            self.data_pickle_file, self.data_feather_file, \
-                self.feather_attribute_file = None, None, None
+            self.data_pickle_file, self.data_feather_file, self.feather_attribute_file = (
+                None,
+                None,
+                None,
+            )
 
     @property
     def id(self) -> Optional[int]:
@@ -200,25 +230,37 @@ def id(self) -> Optional[int]:
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
-        fields = {"Name": self.name,
-                  "Version": self.version,
-                  "Format": self.format,
-                  "Licence": self.licence,
-                  "Download URL": self.url,
-                  "Data file": self.data_file,
-                  "Pickle file": self.data_pickle_file,
-                  "# of features": len(self.features)
-                  if self.features is not None else None}
+        fields = {
+            "Name": self.name,
+            "Version": self.version,
+            "Format": self.format,
+            "Licence": self.licence,
+            "Download URL": self.url,
+            "Data file": self.data_file,
+            "Pickle file": self.data_pickle_file,
+            "# of features": len(self.features) if self.features is not None else None,
+        }
         if self.upload_date is not None:
-            fields["Upload Date"] = self.upload_date.replace('T', ' ')
+            fields["Upload Date"] = self.upload_date.replace("T", " ")
         if self.dataset_id is not None:
             fields["OpenML URL"] = self.openml_url
-        if self.qualities is not None and self.qualities['NumberOfInstances'] is not None:
-            fields["# of instances"] = int(self.qualities['NumberOfInstances'])
+        if self.qualities is not None and self.qualities["NumberOfInstances"] is not None:
+            fields["# of instances"] = int(self.qualities["NumberOfInstances"])
 
         # determines the order in which the information will be printed
-        order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
-                 "OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"]
+        order = [
+            "Name",
+            "Version",
+            "Format",
+            "Upload Date",
+            "Licence",
+            "Download URL",
+            "OpenML URL",
+            "Data File",
+            "Pickle File",
+            "# of features",
+            "# of instances",
+        ]
         return [(key, fields[key]) for key in order if key in fields]
 
     def __eq__(self, other):
@@ -227,12 +269,12 @@ def __eq__(self, other):
             return False
 
         server_fields = {
-            'dataset_id',
-            'version',
-            'upload_date',
-            'url',
-            'dataset',
-            'data_file',
+            "dataset_id",
+            "version",
+            "upload_date",
+            "url",
+            "dataset",
+            "data_file",
         }
 
         # check that the keys are identical
@@ -242,13 +284,13 @@ def __eq__(self, other):
             return False
 
         # check that values of the common keys are identical
-        return all(self.__dict__[key] == other.__dict__[key]
-                   for key in self_keys)
+        return all(self.__dict__[key] == other.__dict__[key] for key in self_keys)
 
     def _download_data(self) -> None:
         """ Download ARFF data file to standard cache directory. Set `self.data_file`. """
         # import required here to avoid circular import.
         from .functions import _get_dataset_arff
+
         self.data_file = _get_dataset_arff(self)
 
     def _get_arff(self, format: str) -> Dict:
@@ -277,36 +319,37 @@ def _get_arff(self, format: str) -> Dict:
         import struct
 
         filename = self.data_file
-        bits = (8 * struct.calcsize("P"))
+        bits = 8 * struct.calcsize("P")
         # Files can be considered too large on a 32-bit system,
         # if it exceeds 120mb (slightly more than covtype dataset size)
         # This number is somewhat arbitrary.
         if bits != 64 and os.path.getsize(filename) > 120000000:
-            raise NotImplementedError("File {} too big for {}-bit system ({} bytes)."
-                                      .format(filename, os.path.getsize(filename), bits))
+            raise NotImplementedError(
+                "File {} too big for {}-bit system ({} bytes).".format(
+                    filename, os.path.getsize(filename), bits
+                )
+            )
 
-        if format.lower() == 'arff':
+        if format.lower() == "arff":
             return_type = arff.DENSE
-        elif format.lower() == 'sparse_arff':
+        elif format.lower() == "sparse_arff":
             return_type = arff.COO
         else:
-            raise ValueError('Unknown data format {}'.format(format))
+            raise ValueError("Unknown data format {}".format(format))
 
         def decode_arff(fh):
             decoder = arff.ArffDecoder()
-            return decoder.decode(fh, encode_nominal=True,
-                                  return_type=return_type)
+            return decoder.decode(fh, encode_nominal=True, return_type=return_type)
 
         if filename[-3:] == ".gz":
             with gzip.open(filename) as fh:
                 return decode_arff(fh)
         else:
-            with io.open(filename, encoding='utf8') as fh:
+            with io.open(filename, encoding="utf8") as fh:
                 return decode_arff(fh)
 
     def _parse_data_from_arff(
-            self,
-            arff_file_path: str
+        self, arff_file_path: str
     ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
         """ Parse all required data from arff file.
 
@@ -325,79 +368,72 @@ def _parse_data_from_arff(
         try:
             data = self._get_arff(self.format)
         except OSError as e:
-            logger.critical("Please check that the data file {} is "
-                            "there and can be read.".format(arff_file_path))
+            logger.critical(
+                "Please check that the data file {} is "
+                "there and can be read.".format(arff_file_path)
+            )
             raise e
 
         ARFF_DTYPES_TO_PD_DTYPE = {
-            'INTEGER': 'integer',
-            'REAL': 'floating',
-            'NUMERIC': 'floating',
-            'STRING': 'string'
+            "INTEGER": "integer",
+            "REAL": "floating",
+            "NUMERIC": "floating",
+            "STRING": "string",
         }
         attribute_dtype = {}
         attribute_names = []
         categories_names = {}
         categorical = []
-        for i, (name, type_) in enumerate(data['attributes']):
+        for i, (name, type_) in enumerate(data["attributes"]):
             # if the feature is nominal and the a sparse matrix is
             # requested, the categories need to be numeric
-            if (isinstance(type_, list)
-                    and self.format.lower() == 'sparse_arff'):
+            if isinstance(type_, list) and self.format.lower() == "sparse_arff":
                 try:
                     # checks if the strings which should be the class labels
                     # can be encoded into integers
                     pd.factorize(type_)[0]
                 except ValueError:
                     raise ValueError(
-                        "Categorical data needs to be numeric when "
-                        "using sparse ARFF."
+                        "Categorical data needs to be numeric when " "using sparse ARFF."
                     )
             # string can only be supported with pandas DataFrame
-            elif (type_ == 'STRING'
-                  and self.format.lower() == 'sparse_arff'):
-                raise ValueError(
-                    "Dataset containing strings is not supported "
-                    "with sparse ARFF."
-                )
+            elif type_ == "STRING" and self.format.lower() == "sparse_arff":
+                raise ValueError("Dataset containing strings is not supported " "with sparse ARFF.")
 
             # infer the dtype from the ARFF header
             if isinstance(type_, list):
                 categorical.append(True)
                 categories_names[name] = type_
                 if len(type_) == 2:
-                    type_norm = [cat.lower().capitalize()
-                                 for cat in type_]
-                    if set(['True', 'False']) == set(type_norm):
+                    type_norm = [cat.lower().capitalize() for cat in type_]
+                    if set(["True", "False"]) == set(type_norm):
                         categories_names[name] = [
-                            True if cat == 'True' else False
-                            for cat in type_norm
+                            True if cat == "True" else False for cat in type_norm
                         ]
-                        attribute_dtype[name] = 'boolean'
+                        attribute_dtype[name] = "boolean"
                     else:
-                        attribute_dtype[name] = 'categorical'
+                        attribute_dtype[name] = "categorical"
                 else:
-                    attribute_dtype[name] = 'categorical'
+                    attribute_dtype[name] = "categorical"
             else:
                 categorical.append(False)
                 attribute_dtype[name] = ARFF_DTYPES_TO_PD_DTYPE[type_]
             attribute_names.append(name)
 
-        if self.format.lower() == 'sparse_arff':
-            X = data['data']
+        if self.format.lower() == "sparse_arff":
+            X = data["data"]
             X_shape = (max(X[1]) + 1, max(X[2]) + 1)
-            X = scipy.sparse.coo_matrix(
-                (X[0], (X[1], X[2])), shape=X_shape, dtype=np.float32)
+            X = scipy.sparse.coo_matrix((X[0], (X[1], X[2])), shape=X_shape, dtype=np.float32)
             X = X.tocsr()
-        elif self.format.lower() == 'arff':
-            X = pd.DataFrame(data['data'], columns=attribute_names)
+        elif self.format.lower() == "arff":
+            X = pd.DataFrame(data["data"], columns=attribute_names)
 
             col = []
             for column_name in X.columns:
-                if attribute_dtype[column_name] in ('categorical',
-                                                    'boolean'):
-                    col.append(self._unpack_categories(
-                        X[column_name], categories_names[column_name]))
+                if attribute_dtype[column_name] in ("categorical", "boolean"):
+                    col.append(
+                        self._unpack_categories(X[column_name], categories_names[column_name])
+                    )
                 else:
                     col.append(X[column_name])
             X = pd.concat(col, axis=1)
@@ -408,10 +444,10 @@ def _parse_data_from_arff(
 
     def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
         """ Parse the arff and pickle the result. Update any old pickle objects. """
-        data_pickle_file = data_file.replace('.arff', '.pkl.py3')
-        data_feather_file = data_file.replace('.arff', '.feather')
-        feather_attribute_file = data_file.replace('.arff', '.feather.attributes.pkl.py3')
-        if os.path.exists(data_pickle_file) and self.cache_format == 'pickle':
+        data_pickle_file = data_file.replace(".arff", ".pkl.py3")
+        data_feather_file = data_file.replace(".arff", ".feather")
+        feather_attribute_file = data_file.replace(".arff", ".feather.attributes.pkl.py3")
+        if os.path.exists(data_pickle_file) and self.cache_format == "pickle":
             # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
             with open(data_pickle_file, "rb") as fh:
                 try:
@@ -429,7 +465,7 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
             if isinstance(data, pd.DataFrame) or scipy.sparse.issparse(data):
                 logger.debug("Data pickle file already exists and is up to date.")
                 return data_pickle_file, data_feather_file, feather_attribute_file
-        elif os.path.exists(data_feather_file) and self.cache_format == 'feather':
+        elif os.path.exists(data_feather_file) and self.cache_format == "feather":
             # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
             try:
                 data = pd.read_feather(data_feather_file)
@@ -454,27 +490,31 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
                 pickle.dump((categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
         else:
             logger.info("pickle write {}".format(self.name))
-            self.cache_format = 'pickle'
+            self.cache_format = "pickle"
             with open(data_pickle_file, "wb") as fh:
                 pickle.dump((X, categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
-            logger.debug("Saved dataset {did}: {name} to file {path}"
-                         .format(did=int(self.dataset_id or -1),
-                                 name=self.name,
-                                 path=data_pickle_file)
-                         )
+            logger.debug(
+                "Saved dataset {did}: {name} to file {path}".format(
+                    did=int(self.dataset_id or -1), name=self.name, path=data_pickle_file
+                )
+            )
         return data_pickle_file, data_feather_file, feather_attribute_file
 
     def _load_data(self):
         """ Load data from pickle or arff. Download data first if not present on disk. """
-        if (self.cache_format == 'pickle' and self.data_pickle_file is None) or \
-                (self.cache_format == 'feather' and self.data_feather_file is None):
+        if (self.cache_format == "pickle" and self.data_pickle_file is None) or (
+            self.cache_format == "feather" and self.data_feather_file is None
+        ):
             if self.data_file is None:
                 self._download_data()
-            self.data_pickle_file, self.data_feather_file, self.feather_attribute_file = \
-                self._create_pickle_in_cache(self.data_file)
+            (
+                self.data_pickle_file,
+                self.data_feather_file,
+                self.feather_attribute_file,
+            ) = self._create_pickle_in_cache(self.data_file)
 
         try:
-            if self.cache_format == 'feather':
+            if self.cache_format == "feather":
                 logger.info("feather load data {}".format(self.name))
                 data = pd.read_feather(self.data_feather_file)
 
@@ -495,8 +535,10 @@ def _load_data(self):
             )
             data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
         except FileNotFoundError:
-            raise ValueError("Cannot find a pickle file for dataset {} at "
-                             "location {} ".format(self.name, self.data_pickle_file))
+            raise ValueError(
+                "Cannot find a pickle file for dataset {} at "
+                "location {} ".format(self.name, self.data_pickle_file)
+            )
 
         return data, categorical, attribute_names
 
@@ -529,11 +571,12 @@ def _convert_array_format(data, array_format, attribute_names):
             # We encode the categories such that they are integer to be able
             # to make a conversion to numeric for backward compatibility
             def _encode_if_category(column):
-                if column.dtype.name == 'category':
+                if column.dtype.name == "category":
                     column = column.cat.codes.astype(np.float32)
                     mask_nan = column == -1
                     column[mask_nan] = np.nan
                 return column
+
             if data.ndim == 2:
                 columns = {
                     column_name: _encode_if_category(data.loc[:, column_name])
@@ -546,7 +589,7 @@ def _encode_if_category(column):
                 return np.asarray(data, dtype=np.float32)
             except ValueError:
                 raise PyOpenMLError(
-                    'PyOpenML cannot handle string when returning numpy'
+                    "PyOpenML cannot handle string when returning numpy"
                     ' arrays. Use dataset_format="dataframe".'
                 )
         elif array_format == "dataframe":
@@ -574,16 +617,16 @@ def _unpack_categories(series, categories):
         return pd.Series(raw_cat, index=series.index, name=series.name)
 
     def get_data(
-            self,
-            target: Optional[Union[List[str], str]] = None,
-            include_row_id: bool = False,
-            include_ignore_attribute: bool = False,
-            dataset_format: str = "dataframe",
+        self,
+        target: Optional[Union[List[str], str]] = None,
+        include_row_id: bool = False,
+        include_ignore_attribute: bool = False,
+        dataset_format: str = "dataframe",
     ) -> Tuple[
-            Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix],
-            Optional[Union[np.ndarray, pd.DataFrame]],
-            List[bool],
-            List[str]
+        Union[np.ndarray, pd.DataFrame, scipy.sparse.csr_matrix],
+        Optional[Union[np.ndarray, pd.DataFrame]],
+        List[bool],
+        List[str],
     ]:
         """ Returns dataset content as dataframes or sparse matrices.
 
@@ -629,64 +672,57 @@ def get_data(
                 to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
-            logger.info("Going to remove the following attributes:"
-                        " %s" % to_exclude)
-            keep = np.array([True if column not in to_exclude else False
-                             for column in attribute_names])
-            if hasattr(data, 'iloc'):
+            logger.info("Going to remove the following attributes:" " %s" % to_exclude)
+            keep = np.array(
+                [True if column not in to_exclude else False for column in attribute_names]
+            )
+            if hasattr(data, "iloc"):
                 data = data.iloc[:, keep]
             else:
                 data = data[:, keep]
             categorical = [cat for cat, k in zip(categorical, keep) if k]
-            attribute_names = [att for att, k in
-                               zip(attribute_names, keep) if k]
+            attribute_names = [att for att, k in zip(attribute_names, keep) if k]
 
         if target is None:
-            data = self._convert_array_format(data, dataset_format,
-                                              attribute_names)
+            data = self._convert_array_format(data, dataset_format, attribute_names)
             targets = None
         else:
             if isinstance(target, str):
-                if ',' in target:
-                    target = target.split(',')
+                if "," in target:
+                    target = target.split(",")
                 else:
                     target = [target]
-            targets = np.array([True if column in target else False
-                                for column in attribute_names])
+            targets = np.array([True if column in target else False for column in attribute_names])
             if np.sum(targets) > 1:
                 raise NotImplementedError(
-                    "Number of requested targets %d is not implemented." %
-                    np.sum(targets)
+                    "Number of requested targets %d is not implemented." % np.sum(targets)
                 )
             target_categorical = [
-                cat for cat, column in zip(categorical, attribute_names)
-                if column in target
+                cat for cat, column in zip(categorical, attribute_names) if column in target
             ]
             target_dtype = int if target_categorical[0] else float
 
-            if hasattr(data, 'iloc'):
+            if hasattr(data, "iloc"):
                 x = data.iloc[:, ~targets]
                 y = data.iloc[:, targets]
             else:
                 x = data[:, ~targets]
                 y = data[:, targets].astype(target_dtype)
 
-            categorical = [cat for cat, t in zip(categorical, targets)
-                           if not t]
-            attribute_names = [att for att, k in zip(attribute_names, targets)
-                               if not k]
+            categorical = [cat for cat, t in zip(categorical, targets) if not t]
+            attribute_names = [att for att, k in zip(attribute_names, targets) if not k]
 
             x = self._convert_array_format(x, dataset_format, attribute_names)
             if scipy.sparse.issparse(y):
                 y = np.asarray(y.todense()).astype(target_dtype).flatten()
             y = y.squeeze()
             y = self._convert_array_format(y, dataset_format, attribute_names)
-            y = y.astype(target_dtype) if dataset_format == 'array' else y
+            y = y.astype(target_dtype) if dataset_format == "array" else y
             data, targets = x, y
 
         return data, targets, categorical, attribute_names
 
-    def retrieve_class_labels(self, target_name: str = 'class') -> Union[None, List[str]]:
+    def retrieve_class_labels(self, target_name: str = "class") -> Union[None, List[str]]:
         """Reads the datasets arff to determine the class-labels.
 
         If the task has no class labels (for example a regression problem)
@@ -704,13 +740,13 @@ def retrieve_class_labels(self, target_name: str = 'class') -> Union[None, List[
         list
         """
         for feature in self.features.values():
-            if (feature.name == target_name) and (feature.data_type == 'nominal'):
+            if (feature.name == target_name) and (feature.data_type == "nominal"):
                 return feature.nominal_values
         return None
 
-    def get_features_by_type(self, data_type, exclude=None,
-                             exclude_ignore_attribute=True,
-                             exclude_row_id_attribute=True):
+    def get_features_by_type(
+        self, data_type, exclude=None, exclude_ignore_attribute=True, exclude_row_id_attribute=True
+    ):
         """
         Return indices of features of a given type, e.g. all nominal features.
         Optional parameters to exclude various features by index or ontology.
@@ -774,12 +810,12 @@ def _get_file_elements(self) -> Dict:
         path = None if self.data_file is None else os.path.abspath(self.data_file)
 
         if self._dataset is not None:
-            file_elements['dataset'] = self._dataset
+            file_elements["dataset"] = self._dataset
         elif path is not None and os.path.exists(path):
-            with open(path, 'rb') as fp:
-                file_elements['dataset'] = fp.read()
+            with open(path, "rb") as fp:
+                file_elements["dataset"] = fp.read()
             try:
-                dataset_utf8 = str(file_elements['dataset'], 'utf8')
+                dataset_utf8 = str(file_elements["dataset"], "utf8")
                 arff.ArffDecoder().decode(dataset_utf8, encode_nominal=True)
             except arff.ArffException:
                 raise ValueError("The file you have provided is not a valid arff file.")
@@ -789,20 +825,39 @@ def _get_file_elements(self) -> Dict:
 
     def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
-        self.dataset_id = int(xml_response['oml:upload_data_set']['oml:id'])
+        self.dataset_id = int(xml_response["oml:upload_data_set"]["oml:id"])
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self. """
-        props = ['id', 'name', 'version', 'description', 'format', 'creator',
-                 'contributor', 'collection_date', 'upload_date', 'language',
-                 'licence', 'url', 'default_target_attribute',
-                 'row_id_attribute', 'ignore_attribute', 'version_label',
-                 'citation', 'tag', 'visibility', 'original_data_url',
-                 'paper_url', 'update_comment', 'md5_checksum']
+        props = [
+            "id",
+            "name",
+            "version",
+            "description",
+            "format",
+            "creator",
+            "contributor",
+            "collection_date",
+            "upload_date",
+            "language",
+            "licence",
+            "url",
+            "default_target_attribute",
+            "row_id_attribute",
+            "ignore_attribute",
+            "version_label",
+            "citation",
+            "tag",
+            "visibility",
+            "original_data_url",
+            "paper_url",
+            "update_comment",
+            "md5_checksum",
+        ]
 
         data_container = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
-        data_dict = OrderedDict([('@xmlns:oml', 'http://openml.org/openml')])
-        data_container['oml:data_set_description'] = data_dict
+        data_dict = OrderedDict([("@xmlns:oml", "http://openml.org/openml")])
+        data_container["oml:data_set_description"] = data_dict
 
         for prop in props:
             content = getattr(self, prop, None)
@@ -816,13 +871,13 @@ def _check_qualities(qualities):
     if qualities is not None:
         qualities_ = {}
         for xmlquality in qualities:
-            name = xmlquality['oml:name']
-            if xmlquality.get('oml:value', None) is None:
-                value = float('NaN')
-            elif xmlquality['oml:value'] == 'null':
-                value = float('NaN')
+            name = xmlquality["oml:name"]
+            if xmlquality.get("oml:value", None) is None:
+                value = float("NaN")
+            elif xmlquality["oml:value"] == "null":
+                value = float("NaN")
             else:
-                value = float(xmlquality['oml:value'])
+                value = float(xmlquality["oml:value"])
             qualities_[name] = value
         return qualities_
     else:
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 26f52a724..79fa82867 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -26,11 +26,11 @@
 from ..utils import (
     _create_cache_directory,
     _remove_cache_dir_for_id,
-    _create_cache_directory_for_id
+    _create_cache_directory_for_id,
 )
 
 
-DATASETS_CACHE_DIR_NAME = 'datasets'
+DATASETS_CACHE_DIR_NAME = "datasets"
 logger = logging.getLogger(__name__)
 
 ############################################################################
@@ -60,12 +60,13 @@ def _list_cached_datasets():
 
         dataset_id = int(directory_name)
 
-        directory_name = os.path.join(dataset_cache_dir,
-                                      directory_name)
+        directory_name = os.path.join(dataset_cache_dir, directory_name)
         dataset_directory_content = os.listdir(directory_name)
 
-        if ("dataset.arff" in dataset_directory_content
-           and "description.xml" in dataset_directory_content):
+        if (
+            "dataset.arff" in dataset_directory_content
+            and "description.xml" in dataset_directory_content
+        ):
             if dataset_id not in datasets:
                 datasets.append(dataset_id)
 
@@ -86,9 +87,7 @@ def _get_cached_datasets():
     return datasets
 
 
-def _get_cached_dataset(
-    dataset_id: int
-) -> OpenMLDataset:
+def _get_cached_dataset(dataset_id: int) -> OpenMLDataset:
     """Get cached dataset for ID.
 
     Returns
@@ -99,69 +98,55 @@ def _get_cached_dataset(
     arff_file = _get_cached_dataset_arff(dataset_id)
     features = _get_cached_dataset_features(dataset_id)
     qualities = _get_cached_dataset_qualities(dataset_id)
-    dataset = _create_dataset_from_description(description,
-                                               features,
-                                               qualities,
-                                               arff_file)
+    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
 
     return dataset
 
 
 def _get_cached_dataset_description(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(
-        DATASETS_CACHE_DIR_NAME, dataset_id,
-    )
+    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
     description_file = os.path.join(did_cache_dir, "description.xml")
     try:
-        with io.open(description_file, encoding='utf8') as fh:
+        with io.open(description_file, encoding="utf8") as fh:
             dataset_xml = fh.read()
         return xmltodict.parse(dataset_xml)["oml:data_set_description"]
     except (IOError, OSError):
         raise OpenMLCacheException(
-            "Dataset description for dataset id %d not "
-            "cached" % dataset_id)
+            "Dataset description for dataset id %d not " "cached" % dataset_id
+        )
 
 
 def _get_cached_dataset_features(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(
-        DATASETS_CACHE_DIR_NAME, dataset_id,
-    )
+    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
     features_file = os.path.join(did_cache_dir, "features.xml")
     try:
         return _load_features_from_file(features_file)
     except (IOError, OSError):
-        raise OpenMLCacheException("Dataset features for dataset id %d not "
-                                   "cached" % dataset_id)
+        raise OpenMLCacheException("Dataset features for dataset id %d not " "cached" % dataset_id)
 
 
 def _get_cached_dataset_qualities(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(
-        DATASETS_CACHE_DIR_NAME, dataset_id,
-    )
+    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
     qualities_file = os.path.join(did_cache_dir, "qualities.xml")
     try:
-        with io.open(qualities_file, encoding='utf8') as fh:
+        with io.open(qualities_file, encoding="utf8") as fh:
             qualities_xml = fh.read()
             qualities_dict = xmltodict.parse(qualities_xml)
-            return qualities_dict["oml:data_qualities"]['oml:quality']
+            return qualities_dict["oml:data_qualities"]["oml:quality"]
     except (IOError, OSError):
-        raise OpenMLCacheException("Dataset qualities for dataset id %d not "
-                                   "cached" % dataset_id)
+        raise OpenMLCacheException("Dataset qualities for dataset id %d not " "cached" % dataset_id)
 
 
 def _get_cached_dataset_arff(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(
-        DATASETS_CACHE_DIR_NAME, dataset_id,
-    )
+    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
     output_file = os.path.join(did_cache_dir, "dataset.arff")
 
     try:
-        with io.open(output_file, encoding='utf8'):
+        with io.open(output_file, encoding="utf8"):
             pass
         return output_file
     except (OSError, IOError):
-        raise OpenMLCacheException("ARFF file for dataset id %d not "
-                                   "cached" % dataset_id)
+        raise OpenMLCacheException("ARFF file for dataset id %d not " "cached" % dataset_id)
 
 
 def _get_cache_directory(dataset: OpenMLDataset) -> str:
@@ -180,16 +165,14 @@ def list_qualities() -> List[str]:
     list
     """
     api_call = "data/qualities/list"
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    qualities = xmltodict.parse(xml_string, force_list=('oml:quality'))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    qualities = xmltodict.parse(xml_string, force_list=("oml:quality"))
     # Minimalistic check if the XML is useful
-    if 'oml:data_qualities_list' not in qualities:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:data_qualities_list"')
-    if not isinstance(qualities['oml:data_qualities_list']['oml:quality'], list):
-        raise TypeError('Error in return XML, does not contain '
-                        '"oml:quality" as a list')
-    qualities = qualities['oml:data_qualities_list']['oml:quality']
+    if "oml:data_qualities_list" not in qualities:
+        raise ValueError("Error in return XML, does not contain " '"oml:data_qualities_list"')
+    if not isinstance(qualities["oml:data_qualities_list"]["oml:quality"], list):
+        raise TypeError("Error in return XML, does not contain " '"oml:quality" as a list')
+    qualities = qualities["oml:data_qualities_list"]["oml:quality"]
     return qualities
 
 
@@ -199,7 +182,7 @@ def list_datasets(
     size: Optional[int] = None,
     status: Optional[str] = None,
     tag: Optional[str] = None,
-    output_format: str = 'dict',
+    output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
 
@@ -255,21 +238,24 @@ def list_datasets(
             If qualities are calculated for the dataset, some of
             these are also included as columns.
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
 
-    return openml.utils._list_all(data_id=data_id,
-                                  output_format=output_format,
-                                  listing_call=_list_datasets,
-                                  offset=offset,
-                                  size=size,
-                                  status=status,
-                                  tag=tag,
-                                  **kwargs)
+    return openml.utils._list_all(
+        data_id=data_id,
+        output_format=output_format,
+        listing_call=_list_datasets,
+        offset=offset,
+        size=size,
+        status=status,
+        tag=tag,
+        **kwargs
+    )
 
 
-def _list_datasets(data_id: Optional[List] = None, output_format='dict', **kwargs):
+def _list_datasets(data_id: Optional[List] = None, output_format="dict", **kwargs):
     """
     Perform api call to return a list of all datasets.
 
@@ -302,49 +288,48 @@ def _list_datasets(data_id: Optional[List] = None, output_format='dict', **kwarg
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
     if data_id is not None:
-        api_call += "/data_id/%s" % ','.join([str(int(i)) for i in data_id])
+        api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
     return __list_datasets(api_call=api_call, output_format=output_format)
 
 
-def __list_datasets(api_call, output_format='dict'):
+def __list_datasets(api_call, output_format="dict"):
 
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))
 
     # Minimalistic check if the XML is useful
-    assert type(datasets_dict['oml:data']['oml:dataset']) == list, \
-        type(datasets_dict['oml:data'])
-    assert datasets_dict['oml:data']['@xmlns:oml'] == \
-        'http://openml.org/openml', datasets_dict['oml:data']['@xmlns:oml']
+    assert type(datasets_dict["oml:data"]["oml:dataset"]) == list, type(datasets_dict["oml:data"])
+    assert datasets_dict["oml:data"]["@xmlns:oml"] == "http://openml.org/openml", datasets_dict[
+        "oml:data"
+    ]["@xmlns:oml"]
 
     datasets = dict()
-    for dataset_ in datasets_dict['oml:data']['oml:dataset']:
-        ignore_attribute = ['oml:file_id', 'oml:quality']
-        dataset = {k.replace('oml:', ''): v
-                   for (k, v) in dataset_.items()
-                   if k not in ignore_attribute}
-        dataset['did'] = int(dataset['did'])
-        dataset['version'] = int(dataset['version'])
+    for dataset_ in datasets_dict["oml:data"]["oml:dataset"]:
+        ignore_attribute = ["oml:file_id", "oml:quality"]
+        dataset = {
+            k.replace("oml:", ""): v for (k, v) in dataset_.items() if k not in ignore_attribute
+        }
+        dataset["did"] = int(dataset["did"])
+        dataset["version"] = int(dataset["version"])
 
         # The number of qualities can range from 0 to infinity
-        for quality in dataset_.get('oml:quality', list()):
+        for quality in dataset_.get("oml:quality", list()):
             try:
-                dataset[quality['@name']] = int(quality['#text'])
+                dataset[quality["@name"]] = int(quality["#text"])
             except ValueError:
-                dataset[quality['@name']] = float(quality['#text'])
-        datasets[dataset['did']] = dataset
+                dataset[quality["@name"]] = float(quality["#text"])
+        datasets[dataset["did"]] = dataset
 
-    if output_format == 'dataframe':
-        datasets = pd.DataFrame.from_dict(datasets, orient='index')
+    if output_format == "dataframe":
+        datasets = pd.DataFrame.from_dict(datasets, orient="index")
 
     return datasets
 
 
 def _load_features_from_file(features_file: str) -> Dict:
-    with io.open(features_file, encoding='utf8') as fh:
+    with io.open(features_file, encoding="utf8") as fh:
         features_xml = fh.read()
-        xml_dict = xmltodict.parse(features_xml,
-                                   force_list=('oml:feature', 'oml:nominal_value'))
+        xml_dict = xmltodict.parse(features_xml, force_list=("oml:feature", "oml:nominal_value"))
         return xml_dict["oml:data_features"]
 
 
@@ -362,23 +347,21 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
     dict
         A dictionary with items {did: bool}
     """
-    dataset_list = list_datasets(status='all')
+    dataset_list = list_datasets(status="all")
     active = {}
 
     for did in dataset_ids:
         dataset = dataset_list.get(did, None)
         if dataset is None:
-            raise ValueError('Could not find dataset {} in OpenML dataset list.'.format(did))
+            raise ValueError("Could not find dataset {} in OpenML dataset list.".format(did))
         else:
-            active[did] = (dataset['status'] == 'active')
+            active[did] = dataset["status"] == "active"
 
     return active
 
 
 def _name_to_id(
-    dataset_name: str,
-    version: Optional[int] = None,
-    error_if_multiple: bool = False
+    dataset_name: str, version: Optional[int] = None, error_if_multiple: bool = False
 ) -> int:
     """ Attempt to find the dataset id of the dataset with the given name.
 
@@ -403,7 +386,7 @@ def _name_to_id(
     int
        The id of the dataset.
     """
-    status = None if version is not None else 'active'
+    status = None if version is not None else "active"
     candidates = list_datasets(data_name=dataset_name, status=status, data_version=version)
     if error_if_multiple and len(candidates) > 1:
         raise ValueError("Multiple active datasets exist with name {}".format(dataset_name))
@@ -417,8 +400,7 @@ def _name_to_id(
 
 
 def get_datasets(
-    dataset_ids: List[Union[str, int]],
-    download_data: bool = True,
+    dataset_ids: List[Union[str, int]], download_data: bool = True,
 ) -> List[OpenMLDataset]:
     """Download datasets.
 
@@ -452,7 +434,7 @@ def get_dataset(
     download_data: bool = True,
     version: int = None,
     error_if_multiple: bool = False,
-    cache_format: str = 'pickle'
+    cache_format: str = "pickle",
 ) -> OpenMLDataset:
     """ Download the OpenML dataset representation, optionally also download actual data file.
 
@@ -489,9 +471,11 @@ def get_dataset(
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
     """
-    if cache_format not in ['feather', 'pickle']:
-        raise ValueError("cache_format must be one of 'feather' or 'pickle. "
-                         "Invalid format specified: {}".format(cache_format))
+    if cache_format not in ["feather", "pickle"]:
+        raise ValueError(
+            "cache_format must be one of 'feather' or 'pickle. "
+            "Invalid format specified: {}".format(cache_format)
+        )
 
     if isinstance(dataset_id, str):
         try:
@@ -499,12 +483,11 @@ def get_dataset(
         except ValueError:
             dataset_id = _name_to_id(dataset_id, version, error_if_multiple)  # type: ignore
     elif not isinstance(dataset_id, int):
-        raise TypeError("`dataset_id` must be one of `str` or `int`, not {}."
-                        .format(type(dataset_id)))
+        raise TypeError(
+            "`dataset_id` must be one of `str` or `int`, not {}.".format(type(dataset_id))
+        )
 
-    did_cache_dir = _create_cache_directory_for_id(
-        DATASETS_CACHE_DIR_NAME, dataset_id,
-    )
+    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
 
     try:
         remove_dataset_cache = True
@@ -514,7 +497,7 @@ def get_dataset(
         try:
             qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
         except OpenMLServerException as e:
-            if e.code == 362 and str(e) == 'No qualities found - None':
+            if e.code == 362 and str(e) == "No qualities found - None":
                 logger.warning("No qualities found for dataset {}".format(dataset_id))
                 qualities = None
             else:
@@ -531,8 +514,7 @@ def get_dataset(
             raise e
     finally:
         if remove_dataset_cache:
-            _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME,
-                                     did_cache_dir)
+            _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, did_cache_dir)
 
     dataset = _create_dataset_from_description(
         description, features, qualities, arff_file, cache_format
@@ -553,11 +535,7 @@ def attributes_arff_from_df(df):
     attributes_arff : str
         The data set attributes as required by the ARFF format.
     """
-    PD_DTYPES_TO_ARFF_DTYPE = {
-        'integer': 'INTEGER',
-        'floating': 'REAL',
-        'string': 'STRING'
-    }
+    PD_DTYPES_TO_ARFF_DTYPE = {"integer": "INTEGER", "floating": "REAL", "string": "STRING"}
     attributes_arff = []
 
     if not all([isinstance(column_name, str) for column_name in df.columns]):
@@ -569,43 +547,55 @@ def attributes_arff_from_df(df):
         # dropped before the inference instead.
         column_dtype = pd.api.types.infer_dtype(df[column_name].dropna(), skipna=False)
 
-        if column_dtype == 'categorical':
+        if column_dtype == "categorical":
             # for categorical feature, arff expects a list string. However, a
             # categorical column can contain mixed type and should therefore
             # raise an error asking to convert all entries to string.
             categories = df[column_name].cat.categories
             categories_dtype = pd.api.types.infer_dtype(categories)
-            if categories_dtype not in ('string', 'unicode'):
-                raise ValueError("The column '{}' of the dataframe is of "
-                                 "'category' dtype. Therefore, all values in "
-                                 "this columns should be string. Please "
-                                 "convert the entries which are not string. "
-                                 "Got {} dtype in this column."
-                                 .format(column_name, categories_dtype))
+            if categories_dtype not in ("string", "unicode"):
+                raise ValueError(
+                    "The column '{}' of the dataframe is of "
+                    "'category' dtype. Therefore, all values in "
+                    "this columns should be string. Please "
+                    "convert the entries which are not string. "
+                    "Got {} dtype in this column.".format(column_name, categories_dtype)
+                )
             attributes_arff.append((column_name, categories.tolist()))
-        elif column_dtype == 'boolean':
+        elif column_dtype == "boolean":
             # boolean are encoded as categorical.
-            attributes_arff.append((column_name, ['True', 'False']))
+            attributes_arff.append((column_name, ["True", "False"]))
         elif column_dtype in PD_DTYPES_TO_ARFF_DTYPE.keys():
-            attributes_arff.append((column_name,
-                                    PD_DTYPES_TO_ARFF_DTYPE[column_dtype]))
+            attributes_arff.append((column_name, PD_DTYPES_TO_ARFF_DTYPE[column_dtype]))
         else:
-            raise ValueError("The dtype '{}' of the column '{}' is not "
-                             "currently supported by liac-arff. Supported "
-                             "dtypes are categorical, string, integer, "
-                             "floating, and boolean."
-                             .format(column_dtype, column_name))
+            raise ValueError(
+                "The dtype '{}' of the column '{}' is not "
+                "currently supported by liac-arff. Supported "
+                "dtypes are categorical, string, integer, "
+                "floating, and boolean.".format(column_dtype, column_name)
+            )
     return attributes_arff
 
 
-def create_dataset(name, description, creator, contributor,
-                   collection_date, language,
-                   licence, attributes, data,
-                   default_target_attribute,
-                   ignore_attribute, citation,
-                   row_id_attribute=None,
-                   original_data_url=None, paper_url=None,
-                   update_comment=None, version_label=None):
+def create_dataset(
+    name,
+    description,
+    creator,
+    contributor,
+    collection_date,
+    language,
+    licence,
+    attributes,
+    data,
+    default_target_attribute,
+    ignore_attribute,
+    citation,
+    row_id_attribute=None,
+    original_data_url=None,
+    paper_url=None,
+    update_comment=None,
+    version_label=None,
+):
     """Create a dataset.
 
     This function creates an OpenMLDataset object.
@@ -681,10 +671,12 @@ def create_dataset(name, description, creator, contributor,
         if data.index.name is not None:
             data = data.reset_index()
 
-    if attributes == 'auto' or isinstance(attributes, dict):
+    if attributes == "auto" or isinstance(attributes, dict):
         if not hasattr(data, "columns"):
-            raise ValueError("Automatically inferring attributes requires "
-                             "a pandas DataFrame. A {!r} was given instead.".format(data))
+            raise ValueError(
+                "Automatically inferring attributes requires "
+                "a pandas DataFrame. A {!r} was given instead.".format(data)
+            )
         # infer the type of data for each column of the DataFrame
         attributes_ = attributes_arff_from_df(data)
         if isinstance(attributes, dict):
@@ -697,13 +689,13 @@ def create_dataset(name, description, creator, contributor,
         attributes_ = attributes
 
     if row_id_attribute is not None:
-        is_row_id_an_attribute = any([attr[0] == row_id_attribute
-                                      for attr in attributes_])
+        is_row_id_an_attribute = any([attr[0] == row_id_attribute for attr in attributes_])
         if not is_row_id_an_attribute:
             raise ValueError(
                 "'row_id_attribute' should be one of the data attribute. "
-                " Got '{}' while candidates are {}."
-                .format(row_id_attribute, [attr[0] for attr in attributes_])
+                " Got '{}' while candidates are {}.".format(
+                    row_id_attribute, [attr[0] for attr in attributes_]
+                )
             )
 
     if hasattr(data, "columns"):
@@ -719,33 +711,31 @@ def create_dataset(name, description, creator, contributor,
 
     if isinstance(data, (list, np.ndarray)):
         if isinstance(data[0], (list, np.ndarray)):
-            data_format = 'arff'
+            data_format = "arff"
         elif isinstance(data[0], dict):
-            data_format = 'sparse_arff'
+            data_format = "sparse_arff"
         else:
             raise ValueError(
-                'When giving a list or a numpy.ndarray, '
-                'they should contain a list/ numpy.ndarray '
-                'for dense data or a dictionary for sparse '
-                'data. Got {!r} instead.'
-                .format(data[0])
+                "When giving a list or a numpy.ndarray, "
+                "they should contain a list/ numpy.ndarray "
+                "for dense data or a dictionary for sparse "
+                "data. Got {!r} instead.".format(data[0])
             )
     elif isinstance(data, coo_matrix):
-        data_format = 'sparse_arff'
+        data_format = "sparse_arff"
     else:
         raise ValueError(
-            'When giving a list or a numpy.ndarray, '
-            'they should contain a list/ numpy.ndarray '
-            'for dense data or a dictionary for sparse '
-            'data. Got {!r} instead.'
-            .format(data[0])
+            "When giving a list or a numpy.ndarray, "
+            "they should contain a list/ numpy.ndarray "
+            "for dense data or a dictionary for sparse "
+            "data. Got {!r} instead.".format(data[0])
         )
 
     arff_object = {
-        'relation': name,
-        'description': description,
-        'attributes': attributes_,
-        'data': data
+        "relation": name,
+        "description": description,
+        "attributes": attributes_,
+        "data": data,
     }
 
     # serializes the ARFF dataset object and returns a string
@@ -753,15 +743,13 @@ def create_dataset(name, description, creator, contributor,
     try:
         # check if ARFF is valid
         decoder = arff.ArffDecoder()
-        return_type = arff.COO if data_format == 'sparse_arff' else arff.DENSE
-        decoder.decode(
-            arff_dataset,
-            encode_nominal=True,
-            return_type=return_type
-        )
+        return_type = arff.COO if data_format == "sparse_arff" else arff.DENSE
+        decoder.decode(arff_dataset, encode_nominal=True, return_type=return_type)
     except arff.ArffException:
-        raise ValueError("The arguments you have provided \
-                             do not construct a valid ARFF file")
+        raise ValueError(
+            "The arguments you have provided \
+                             do not construct a valid ARFF file"
+        )
 
     return OpenMLDataset(
         name=name,
@@ -798,20 +786,17 @@ def status_update(data_id, status):
     status : str,
         'active' or 'deactivated'
     """
-    legal_status = {'active', 'deactivated'}
+    legal_status = {"active", "deactivated"}
     if status not in legal_status:
-        raise ValueError('Illegal status value. '
-                         'Legal values: %s' % legal_status)
-    data = {'data_id': data_id, 'status': status}
-    result_xml = openml._api_calls._perform_api_call("data/status/update",
-                                                     'post',
-                                                     data=data)
+        raise ValueError("Illegal status value. " "Legal values: %s" % legal_status)
+    data = {"data_id": data_id, "status": status}
+    result_xml = openml._api_calls._perform_api_call("data/status/update", "post", data=data)
     result = xmltodict.parse(result_xml)
-    server_data_id = result['oml:data_status_update']['oml:id']
-    server_status = result['oml:data_status_update']['oml:status']
+    server_data_id = result["oml:data_status_update"]["oml:id"]
+    server_status = result["oml:data_status_update"]["oml:status"]
     if status != server_status or int(data_id) != int(server_data_id):
         # This should never happen
-        raise ValueError('Data id/status does not collide')
+        raise ValueError("Data id/status does not collide")
 
 
 def _get_dataset_description(did_cache_dir, dataset_id):
@@ -843,18 +828,16 @@ def _get_dataset_description(did_cache_dir, dataset_id):
         return _get_cached_dataset_description(dataset_id)
     except OpenMLCacheException:
         url_extension = "data/{}".format(dataset_id)
-        dataset_xml = openml._api_calls._perform_api_call(url_extension, 'get')
-        with io.open(description_file, "w", encoding='utf8') as fh:
+        dataset_xml = openml._api_calls._perform_api_call(url_extension, "get")
+        with io.open(description_file, "w", encoding="utf8") as fh:
             fh.write(dataset_xml)
 
-    description = xmltodict.parse(dataset_xml)[
-        "oml:data_set_description"]
+    description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
 
     return description
 
 
-def _get_dataset_arff(description: Union[Dict, OpenMLDataset],
-                      cache_directory: str = None) -> str:
+def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
     """ Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
@@ -879,8 +862,8 @@ def _get_dataset_arff(description: Union[Dict, OpenMLDataset],
     """
     if isinstance(description, dict):
         md5_checksum_fixture = description.get("oml:md5_checksum")
-        url = description['oml:url']
-        did = description.get('oml:id')
+        url = description["oml:url"]
+        did = description.get("oml:id")
     elif isinstance(description, OpenMLDataset):
         md5_checksum_fixture = description.md5_checksum
         url = description.url
@@ -894,9 +877,7 @@ def _get_dataset_arff(description: Union[Dict, OpenMLDataset],
 
     try:
         openml._api_calls._download_text_file(
-            source=url,
-            output_path=output_file_path,
-            md5_checksum=md5_checksum_fixture
+            source=url, output_path=output_file_path, md5_checksum=md5_checksum_fixture
         )
     except OpenMLHashException as e:
         additional_info = " Raised when downloading dataset {}.".format(did)
@@ -932,8 +913,8 @@ def _get_dataset_features(did_cache_dir, dataset_id):
     # Dataset features aren't subject to change...
     if not os.path.isfile(features_file):
         url_extension = "data/features/{}".format(dataset_id)
-        features_xml = openml._api_calls._perform_api_call(url_extension, 'get')
-        with io.open(features_file, "w", encoding='utf8') as fh:
+        features_xml = openml._api_calls._perform_api_call(url_extension, "get")
+        with io.open(features_file, "w", encoding="utf8") as fh:
             fh.write(features_xml)
 
     return _load_features_from_file(features_file)
@@ -962,27 +943,27 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
     # Dataset qualities are subject to change and must be fetched every time
     qualities_file = os.path.join(did_cache_dir, "qualities.xml")
     try:
-        with io.open(qualities_file, encoding='utf8') as fh:
+        with io.open(qualities_file, encoding="utf8") as fh:
             qualities_xml = fh.read()
     except (OSError, IOError):
         url_extension = "data/qualities/{}".format(dataset_id)
-        qualities_xml = openml._api_calls._perform_api_call(url_extension, 'get')
+        qualities_xml = openml._api_calls._perform_api_call(url_extension, "get")
 
-        with io.open(qualities_file, "w", encoding='utf8') as fh:
+        with io.open(qualities_file, "w", encoding="utf8") as fh:
             fh.write(qualities_xml)
 
-    xml_as_dict = xmltodict.parse(qualities_xml, force_list=('oml:quality',))
-    qualities = xml_as_dict['oml:data_qualities']['oml:quality']
+    xml_as_dict = xmltodict.parse(qualities_xml, force_list=("oml:quality",))
+    qualities = xml_as_dict["oml:data_qualities"]["oml:quality"]
 
     return qualities
 
 
 def _create_dataset_from_description(
-        description: Dict[str, str],
-        features: Dict,
-        qualities: List,
-        arff_file: str = None,
-        cache_format: str = 'pickle',
+    description: Dict[str, str],
+    features: Dict,
+    qualities: List,
+    arff_file: str = None,
+    cache_format: str = "pickle",
 ) -> OpenMLDataset:
     """Create a dataset object from a description dict.
 
@@ -1049,11 +1030,11 @@ def _get_online_dataset_arff(dataset_id):
     str
         A string representation of an ARFF file.
     """
-    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, 'get')
+    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, "get")
     # build a dict from the xml.
     # use the url from the dataset description and return the ARFF string
     return openml._api_calls._download_text_file(
-        xmltodict.parse(dataset_xml)['oml:data_set_description']['oml:url'],
+        xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:url"],
     )
 
 
@@ -1071,9 +1052,6 @@ def _get_online_dataset_format(dataset_id):
     str
         Dataset format.
     """
-    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id,
-                                                      'get')
+    dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, "get")
     # build a dict from the xml and get the format from the dataset description
-    return xmltodict\
-        .parse(dataset_xml)['oml:data_set_description']['oml:format']\
-        .lower()
+    return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower()
diff --git a/openml/evaluations/__init__.py b/openml/evaluations/__init__.py
index 0bee18ba3..400a59652 100644
--- a/openml/evaluations/__init__.py
+++ b/openml/evaluations/__init__.py
@@ -3,5 +3,9 @@
 from .evaluation import OpenMLEvaluation
 from .functions import list_evaluations, list_evaluation_measures, list_evaluations_setups
 
-__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures',
-           'list_evaluations_setups']
+__all__ = [
+    "OpenMLEvaluation",
+    "list_evaluations",
+    "list_evaluation_measures",
+    "list_evaluations_setups",
+]
diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py
index 1adb449a5..8bdf741c2 100644
--- a/openml/evaluations/evaluation.py
+++ b/openml/evaluations/evaluation.py
@@ -40,10 +40,24 @@ class OpenMLEvaluation(object):
         list of information per class.
         (e.g., in case of precision, auroc, recall)
     """
-    def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
-                 data_id, data_name, function, upload_time, uploader: int,
-                 uploader_name: str, value, values,
-                 array_data=None):
+
+    def __init__(
+        self,
+        run_id,
+        task_id,
+        setup_id,
+        flow_id,
+        flow_name,
+        data_id,
+        data_name,
+        function,
+        upload_time,
+        uploader: int,
+        uploader_name: str,
+        value,
+        values,
+        array_data=None,
+    ):
         self.run_id = run_id
         self.task_id = task_id
         self.setup_id = setup_id
@@ -61,28 +75,41 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
 
     def __repr__(self):
         header = "OpenML Evaluation"
-        header = '{}\n{}\n'.format(header, '=' * len(header))
+        header = "{}\n{}\n".format(header, "=" * len(header))
 
-        fields = {"Upload Date": self.upload_time,
-                  "Run ID": self.run_id,
-                  "OpenML Run URL": openml.runs.OpenMLRun.url_for_id(self.run_id),
-                  "Task ID": self.task_id,
-                  "OpenML Task URL": openml.tasks.OpenMLTask.url_for_id(self.task_id),
-                  "Flow ID": self.flow_id,
-                  "OpenML Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-                  "Setup ID": self.setup_id,
-                  "Data ID": self.data_id,
-                  "Data Name": self.data_name,
-                  "OpenML Data URL": openml.datasets.OpenMLDataset.url_for_id(self.data_id),
-                  "Metric Used": self.function,
-                  "Result": self.value}
+        fields = {
+            "Upload Date": self.upload_time,
+            "Run ID": self.run_id,
+            "OpenML Run URL": openml.runs.OpenMLRun.url_for_id(self.run_id),
+            "Task ID": self.task_id,
+            "OpenML Task URL": openml.tasks.OpenMLTask.url_for_id(self.task_id),
+            "Flow ID": self.flow_id,
+            "OpenML Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
+            "Setup ID": self.setup_id,
+            "Data ID": self.data_id,
+            "Data Name": self.data_name,
+            "OpenML Data URL": openml.datasets.OpenMLDataset.url_for_id(self.data_id),
+            "Metric Used": self.function,
+            "Result": self.value,
+        }
 
-        order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
-                 "Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
-                 "OpenML Data URL", "Metric Used", "Result"]
+        order = [
+            "Uploader Date",
+            "Run ID",
+            "OpenML Run URL",
+            "Task ID",
+            "OpenML Task URL" "Flow ID",
+            "OpenML Flow URL",
+            "Setup ID",
+            "Data ID",
+            "Data Name",
+            "OpenML Data URL",
+            "Metric Used",
+            "Result",
+        ]
         fields = [(key, fields[key]) for key in order if key in fields]
 
         longest_field_name_length = max(len(name) for name, value in fields)
         field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
-        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        body = "\n".join(field_line_format.format(name, value) for name, value in fields)
         return header + body
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index cf2169c79..adaf419ef 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -26,7 +26,7 @@ def list_evaluations(
     study: Optional[int] = None,
     per_fold: Optional[bool] = None,
     sort_order: Optional[str] = None,
-    output_format: str = 'object'
+    output_format: str = "object",
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all run-evaluation pairs matching all of the given filters.
@@ -70,28 +70,31 @@ def list_evaluations(
     -------
     dict or dataframe
     """
-    if output_format not in ['dataframe', 'dict', 'object']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'object', 'dataframe', or 'dict' applicable.")
+    if output_format not in ["dataframe", "dict", "object"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'object', 'dataframe', or 'dict' applicable."
+        )
 
     per_fold_str = None
     if per_fold is not None:
         per_fold_str = str(per_fold).lower()
 
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_evaluations,
-                                  function=function,
-                                  offset=offset,
-                                  size=size,
-                                  task=task,
-                                  setup=setup,
-                                  flow=flow,
-                                  run=run,
-                                  uploader=uploader,
-                                  tag=tag,
-                                  study=study,
-                                  sort_order=sort_order,
-                                  per_fold=per_fold_str)
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_evaluations,
+        function=function,
+        offset=offset,
+        size=size,
+        task=task,
+        setup=setup,
+        flow=flow,
+        run=run,
+        uploader=uploader,
+        tag=tag,
+        study=study,
+        sort_order=sort_order,
+        per_fold=per_fold_str,
+    )
 
 
 def _list_evaluations(
@@ -103,7 +106,7 @@ def _list_evaluations(
     uploader: Optional[List] = None,
     study: Optional[int] = None,
     sort_order: Optional[str] = None,
-    output_format: str = 'object',
+    output_format: str = "object",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
@@ -153,15 +156,15 @@ def _list_evaluations(
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
     if task is not None:
-        api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
+        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
     if setup is not None:
-        api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
     if flow is not None:
-        api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
+        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
     if run is not None:
-        api_call += "/run/%s" % ','.join([str(int(i)) for i in run])
+        api_call += "/run/%s" % ",".join([str(int(i)) for i in run])
     if uploader is not None:
-        api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
+        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
@@ -170,68 +173,77 @@ def _list_evaluations(
     return __list_evaluations(api_call, output_format=output_format)
 
 
-def __list_evaluations(api_call, output_format='object'):
+def __list_evaluations(api_call, output_format="object"):
     """Helper function to parse API calls which are lists of runs"""
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    evals_dict = xmltodict.parse(xml_string, force_list=("oml:evaluation",))
     # Minimalistic check if the XML is useful
-    if 'oml:evaluations' not in evals_dict:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:evaluations": %s' % str(evals_dict))
+    if "oml:evaluations" not in evals_dict:
+        raise ValueError(
+            "Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict)
+        )
 
-    assert type(evals_dict['oml:evaluations']['oml:evaluation']) == list, \
-        type(evals_dict['oml:evaluations'])
+    assert type(evals_dict["oml:evaluations"]["oml:evaluation"]) == list, type(
+        evals_dict["oml:evaluations"]
+    )
 
     evals = collections.OrderedDict()
-    uploader_ids = list(set([eval_['oml:uploader'] for eval_ in
-                             evals_dict['oml:evaluations']['oml:evaluation']]))
-    api_users = "user/list/user_id/" + ','.join(uploader_ids)
-    xml_string_user = openml._api_calls._perform_api_call(api_users, 'get')
-    users = xmltodict.parse(xml_string_user, force_list=('oml:user',))
-    user_dict = {user['oml:id']: user['oml:username'] for user in users['oml:users']['oml:user']}
-    for eval_ in evals_dict['oml:evaluations']['oml:evaluation']:
-        run_id = int(eval_['oml:run_id'])
+    uploader_ids = list(
+        set([eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]])
+    )
+    api_users = "user/list/user_id/" + ",".join(uploader_ids)
+    xml_string_user = openml._api_calls._perform_api_call(api_users, "get")
+    users = xmltodict.parse(xml_string_user, force_list=("oml:user",))
+    user_dict = {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]}
+    for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]:
+        run_id = int(eval_["oml:run_id"])
         value = None
         values = None
         array_data = None
-        if 'oml:value' in eval_:
-            value = float(eval_['oml:value'])
-        if 'oml:values' in eval_:
-            values = json.loads(eval_['oml:values'])
-        if 'oml:array_data' in eval_:
-            array_data = eval_['oml:array_data']
-
-        if output_format == 'object':
-            evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']),
-                                             int(eval_['oml:task_id']),
-                                             int(eval_['oml:setup_id']),
-                                             int(eval_['oml:flow_id']),
-                                             eval_['oml:flow_name'],
-                                             int(eval_['oml:data_id']),
-                                             eval_['oml:data_name'],
-                                             eval_['oml:function'],
-                                             eval_['oml:upload_time'],
-                                             int(eval_['oml:uploader']),
-                                             user_dict[eval_['oml:uploader']],
-                                             value, values, array_data)
+        if "oml:value" in eval_:
+            value = float(eval_["oml:value"])
+        if "oml:values" in eval_:
+            values = json.loads(eval_["oml:values"])
+        if "oml:array_data" in eval_:
+            array_data = eval_["oml:array_data"]
+
+        if output_format == "object":
+            evals[run_id] = OpenMLEvaluation(
+                int(eval_["oml:run_id"]),
+                int(eval_["oml:task_id"]),
+                int(eval_["oml:setup_id"]),
+                int(eval_["oml:flow_id"]),
+                eval_["oml:flow_name"],
+                int(eval_["oml:data_id"]),
+                eval_["oml:data_name"],
+                eval_["oml:function"],
+                eval_["oml:upload_time"],
+                int(eval_["oml:uploader"]),
+                user_dict[eval_["oml:uploader"]],
+                value,
+                values,
+                array_data,
+            )
         else:
             # for output_format in ['dict', 'dataframe']
-            evals[run_id] = {'run_id': int(eval_['oml:run_id']),
-                             'task_id': int(eval_['oml:task_id']),
-                             'setup_id': int(eval_['oml:setup_id']),
-                             'flow_id': int(eval_['oml:flow_id']),
-                             'flow_name': eval_['oml:flow_name'],
-                             'data_id': int(eval_['oml:data_id']),
-                             'data_name': eval_['oml:data_name'],
-                             'function': eval_['oml:function'],
-                             'upload_time': eval_['oml:upload_time'],
-                             'uploader': int(eval_['oml:uploader']),
-                             'uploader_name': user_dict[eval_['oml:uploader']],
-                             'value': value,
-                             'values': values,
-                             'array_data': array_data}
-
-    if output_format == 'dataframe':
+            evals[run_id] = {
+                "run_id": int(eval_["oml:run_id"]),
+                "task_id": int(eval_["oml:task_id"]),
+                "setup_id": int(eval_["oml:setup_id"]),
+                "flow_id": int(eval_["oml:flow_id"]),
+                "flow_name": eval_["oml:flow_name"],
+                "data_id": int(eval_["oml:data_id"]),
+                "data_name": eval_["oml:data_name"],
+                "function": eval_["oml:function"],
+                "upload_time": eval_["oml:upload_time"],
+                "uploader": int(eval_["oml:uploader"]),
+                "uploader_name": user_dict[eval_["oml:uploader"]],
+                "value": value,
+                "values": values,
+                "array_data": array_data,
+            }
+
+    if output_format == "dataframe":
         rows = [value for key, value in evals.items()]
         evals = pd.DataFrame.from_records(rows, columns=rows[0].keys())
     return evals
@@ -249,34 +261,31 @@ def list_evaluation_measures() -> List[str]:
 
     """
     api_call = "evaluationmeasure/list"
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    qualities = xmltodict.parse(xml_string, force_list=('oml:measures'))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    qualities = xmltodict.parse(xml_string, force_list=("oml:measures"))
     # Minimalistic check if the XML is useful
-    if 'oml:evaluation_measures' not in qualities:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:evaluation_measures"')
-    if not isinstance(qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure'],
-                      list):
-        raise TypeError('Error in return XML, does not contain '
-                        '"oml:measure" as a list')
-    qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure']
+    if "oml:evaluation_measures" not in qualities:
+        raise ValueError("Error in return XML, does not contain " '"oml:evaluation_measures"')
+    if not isinstance(qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"], list):
+        raise TypeError("Error in return XML, does not contain " '"oml:measure" as a list')
+    qualities = qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"]
     return qualities
 
 
 def list_evaluations_setups(
-        function: str,
-        offset: Optional[int] = None,
-        size: Optional[int] = None,
-        task: Optional[List] = None,
-        setup: Optional[List] = None,
-        flow: Optional[List] = None,
-        run: Optional[List] = None,
-        uploader: Optional[List] = None,
-        tag: Optional[str] = None,
-        per_fold: Optional[bool] = None,
-        sort_order: Optional[str] = None,
-        output_format: str = 'dataframe',
-        parameters_in_separate_columns: bool = False
+    function: str,
+    offset: Optional[int] = None,
+    size: Optional[int] = None,
+    task: Optional[List] = None,
+    setup: Optional[List] = None,
+    flow: Optional[List] = None,
+    run: Optional[List] = None,
+    uploader: Optional[List] = None,
+    tag: Optional[str] = None,
+    per_fold: Optional[bool] = None,
+    sort_order: Optional[str] = None,
+    output_format: str = "dataframe",
+    parameters_in_separate_columns: bool = False,
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all run-evaluation pairs matching all of the given filters
@@ -319,47 +328,60 @@ def list_evaluations_setups(
     dict or dataframe with hyperparameter settings as a list of tuples.
     """
     if parameters_in_separate_columns and (flow is None or len(flow) != 1):
-        raise ValueError("Can set parameters_in_separate_columns to true "
-                         "only for single flow_id")
+        raise ValueError(
+            "Can set parameters_in_separate_columns to true " "only for single flow_id"
+        )
 
     # List evaluations
-    evals = list_evaluations(function=function, offset=offset, size=size, run=run, task=task,
-                             setup=setup, flow=flow, uploader=uploader, tag=tag,
-                             per_fold=per_fold, sort_order=sort_order, output_format='dataframe')
+    evals = list_evaluations(
+        function=function,
+        offset=offset,
+        size=size,
+        run=run,
+        task=task,
+        setup=setup,
+        flow=flow,
+        uploader=uploader,
+        tag=tag,
+        per_fold=per_fold,
+        sort_order=sort_order,
+        output_format="dataframe",
+    )
     # List setups
     # list_setups by setup id does not support large sizes (exceeds URL length limit)
     # Hence we split the list of unique setup ids returned by list_evaluations into chunks of size N
     df = pd.DataFrame()
     if len(evals) != 0:
         N = 100  # size of section
-        length = len(evals['setup_id'].unique())  # length of the array we want to split
+        length = len(evals["setup_id"].unique())  # length of the array we want to split
         # array_split - allows indices_or_sections to not equally divide the array
         # array_split -length % N sub-arrays of size length//N + 1 and the rest of size length//N.
-        setup_chunks = np.array_split(ary=evals['setup_id'].unique(),
-                                      indices_or_sections=((length - 1) // N) + 1)
+        setup_chunks = np.array_split(
+            ary=evals["setup_id"].unique(), indices_or_sections=((length - 1) // N) + 1
+        )
         setups = pd.DataFrame()
         for setup in setup_chunks:
-            result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format='dataframe'))
-            result.drop('flow_id', axis=1, inplace=True)
+            result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format="dataframe"))
+            result.drop("flow_id", axis=1, inplace=True)
             # concat resulting setup chunks into single datframe
             setups = pd.concat([setups, result], ignore_index=True)
         parameters = []
         # Convert parameters of setup into list of tuples of (hyperparameter, value)
-        for parameter_dict in setups['parameters']:
+        for parameter_dict in setups["parameters"]:
             if parameter_dict is not None:
-                parameters.append({param['full_name']: param['value']
-                                   for param in parameter_dict.values()})
+                parameters.append(
+                    {param["full_name"]: param["value"] for param in parameter_dict.values()}
+                )
             else:
                 parameters.append({})
-        setups['parameters'] = parameters
+        setups["parameters"] = parameters
         # Merge setups with evaluations
-        df = pd.merge(evals, setups, on='setup_id', how='left')
+        df = pd.merge(evals, setups, on="setup_id", how="left")
 
     if parameters_in_separate_columns:
-        df = pd.concat([df.drop('parameters', axis=1),
-                        df['parameters'].apply(pd.Series)], axis=1)
+        df = pd.concat([df.drop("parameters", axis=1), df["parameters"].apply(pd.Series)], axis=1)
 
-    if output_format == 'dataframe':
+    if output_format == "dataframe":
         return df
     else:
-        return df.to_dict(orient='index')
+        return df.to_dict(orient="index")
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 6dff18a52..07eb64e6c 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -28,35 +28,38 @@ def __init__(self, message: str, code: int = None, url: str = None):
         super().__init__(message)
 
     def __repr__(self):
-        return '%s returned code %s: %s' % (
-            self.url, self.code, self.message,
-        )
+        return "%s returned code %s: %s" % (self.url, self.code, self.message,)
 
 
 class OpenMLServerNoResult(OpenMLServerException):
     """exception for when the result of the server is empty. """
+
     pass
 
 
 class OpenMLCacheException(PyOpenMLError):
     """Dataset / task etc not found in cache"""
+
     def __init__(self, message: str):
         super().__init__(message)
 
 
 class OpenMLHashException(PyOpenMLError):
     """Locally computed hash is different than hash announced by the server."""
+
     pass
 
 
 class OpenMLPrivateDatasetError(PyOpenMLError):
     """ Exception thrown when the user has no rights to access the dataset. """
+
     def __init__(self, message: str):
         super().__init__(message)
 
 
 class OpenMLRunsExistError(PyOpenMLError):
     """ Indicates run(s) already exists on the server when they should not be duplicated. """
+
     def __init__(self, run_ids: set, message: str):
         if len(run_ids) < 1:
             raise ValueError("Set of run ids must be non-empty.")
diff --git a/openml/extensions/__init__.py b/openml/extensions/__init__.py
index 13b644e04..91cbc1600 100644
--- a/openml/extensions/__init__.py
+++ b/openml/extensions/__init__.py
@@ -10,8 +10,8 @@
 
 
 __all__ = [
-    'Extension',
-    'register_extension',
-    'get_extension_by_model',
-    'get_extension_by_flow',
+    "Extension",
+    "register_extension",
+    "get_extension_by_model",
+    "get_extension_by_flow",
 ]
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
index 070d17205..2d06b69e0 100644
--- a/openml/extensions/extension_interface.py
+++ b/openml/extensions/extension_interface.py
@@ -26,7 +26,7 @@ class Extension(ABC):
 
     @classmethod
     @abstractmethod
-    def can_handle_flow(cls, flow: 'OpenMLFlow') -> bool:
+    def can_handle_flow(cls, flow: "OpenMLFlow") -> bool:
         """Check whether a given flow can be handled by this extension.
 
         This is typically done by parsing the ``external_version`` field.
@@ -60,9 +60,12 @@ def can_handle_model(cls, model: Any) -> bool:
     # Abstract methods for flow serialization and de-serialization
 
     @abstractmethod
-    def flow_to_model(self, flow: 'OpenMLFlow',
-                      initialize_with_defaults: bool = False,
-                      strict_version: bool = True) -> Any:
+    def flow_to_model(
+        self,
+        flow: "OpenMLFlow",
+        initialize_with_defaults: bool = False,
+        strict_version: bool = True,
+    ) -> Any:
         """Instantiate a model from the flow representation.
 
         Parameters
@@ -82,7 +85,7 @@ def flow_to_model(self, flow: 'OpenMLFlow',
         """
 
     @abstractmethod
-    def model_to_flow(self, model: Any) -> 'OpenMLFlow':
+    def model_to_flow(self, model: Any) -> "OpenMLFlow":
         """Transform a model to a flow for uploading it to OpenML.
 
         Parameters
@@ -156,13 +159,13 @@ def seed_model(self, model: Any, seed: Optional[int]) -> Any:
     def _run_model_on_fold(
         self,
         model: Any,
-        task: 'OpenMLTask',
+        task: "OpenMLTask",
         X_train: Union[np.ndarray, scipy.sparse.spmatrix],
         rep_no: int,
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Optional['OpenMLRunTrace']]:
+    ) -> Tuple[np.ndarray, np.ndarray, "OrderedDict[str, float]", Optional["OpenMLRunTrace"]]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction information.
 
         Returns the data that is necessary to construct the OpenML Run object. Is used by
@@ -201,9 +204,7 @@ def _run_model_on_fold(
 
     @abstractmethod
     def obtain_parameter_values(
-        self,
-        flow: 'OpenMLFlow',
-        model: Any = None,
+        self, flow: "OpenMLFlow", model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -233,9 +234,7 @@ def obtain_parameter_values(
 
     @abstractmethod
     def instantiate_model_from_hpo_class(
-        self,
-        model: Any,
-        trace_iteration: 'OpenMLTraceIteration',
+        self, model: Any, trace_iteration: "OpenMLTraceIteration",
     ) -> Any:
         """Instantiate a base model which can be searched over by the hyperparameter optimization
         model.
diff --git a/openml/extensions/functions.py b/openml/extensions/functions.py
index 826cb0853..52bb03961 100644
--- a/openml/extensions/functions.py
+++ b/openml/extensions/functions.py
@@ -2,6 +2,7 @@
 
 from typing import Any, Optional, Type, TYPE_CHECKING
 from . import Extension
+
 # Need to implement the following by its full path because otherwise it won't be possible to
 # access openml.extensions.extensions
 import openml.extensions
@@ -29,8 +30,7 @@ def register_extension(extension: Type[Extension]) -> None:
 
 
 def get_extension_by_flow(
-    flow: 'OpenMLFlow',
-    raise_if_no_extension: bool = False,
+    flow: "OpenMLFlow", raise_if_no_extension: bool = False,
 ) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
@@ -54,22 +54,19 @@ def get_extension_by_flow(
             candidates.append(extension_class())
     if len(candidates) == 0:
         if raise_if_no_extension:
-            raise ValueError('No extension registered which can handle flow: {}'.format(flow))
+            raise ValueError("No extension registered which can handle flow: {}".format(flow))
         else:
             return None
     elif len(candidates) == 1:
         return candidates[0]
     else:
         raise ValueError(
-            'Multiple extensions registered which can handle flow: {}, but only one '
-            'is allowed ({}).'.format(flow, candidates)
+            "Multiple extensions registered which can handle flow: {}, but only one "
+            "is allowed ({}).".format(flow, candidates)
         )
 
 
-def get_extension_by_model(
-    model: Any,
-    raise_if_no_extension: bool = False,
-) -> Optional[Extension]:
+def get_extension_by_model(model: Any, raise_if_no_extension: bool = False,) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
     Iterates all registered extensions and checks whether they can handle the presented model.
@@ -92,13 +89,13 @@ def get_extension_by_model(
             candidates.append(extension_class())
     if len(candidates) == 0:
         if raise_if_no_extension:
-            raise ValueError('No extension registered which can handle model: {}'.format(model))
+            raise ValueError("No extension registered which can handle model: {}".format(model))
         else:
             return None
     elif len(candidates) == 1:
         return candidates[0]
     else:
         raise ValueError(
-            'Multiple extensions registered which can handle model: {}, but only one '
-            'is allowed ({}).'.format(model, candidates)
+            "Multiple extensions registered which can handle model: {}, but only one "
+            "is allowed ({}).".format(model, candidates)
         )
diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
index 1c1732cde..2003934db 100644
--- a/openml/extensions/sklearn/__init__.py
+++ b/openml/extensions/sklearn/__init__.py
@@ -4,6 +4,6 @@
 from openml.extensions import register_extension
 
 
-__all__ = ['SklearnExtension']
+__all__ = ["SklearnExtension"]
 
 register_extension(SklearnExtension)
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 9720bd853..af0b42144 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -46,13 +46,14 @@
 
 
 DEPENDENCIES_PATTERN = re.compile(
-    r'^(?P<name>[\w\-]+)((?P<operation>==|>=|>)'
-    r'(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$'
+    r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
+    r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$"
 )
 
 
-SIMPLE_NUMPY_TYPES = [nptype for type_cat, nptypes in np.sctypes.items()
-                      for nptype in nptypes if type_cat != 'others']
+SIMPLE_NUMPY_TYPES = [
+    nptype for type_cat, nptypes in np.sctypes.items() for nptype in nptypes if type_cat != "others"
+]
 SIMPLE_TYPES = tuple([bool, int, float, str] + SIMPLE_NUMPY_TYPES)
 
 
@@ -63,7 +64,7 @@ class SklearnExtension(Extension):
     # General setup
 
     @classmethod
-    def can_handle_flow(cls, flow: 'OpenMLFlow') -> bool:
+    def can_handle_flow(cls, flow: "OpenMLFlow") -> bool:
         """Check whether a given describes a scikit-learn estimator.
 
         This is done by parsing the ``external_version`` field.
@@ -94,10 +95,7 @@ def can_handle_model(cls, model: Any) -> bool:
 
     @classmethod
     def trim_flow_name(
-            cls,
-            long_name: str,
-            extra_trim_length: int = 100,
-            _outer: bool = True
+        cls, long_name: str, extra_trim_length: int = 100, _outer: bool = True
     ) -> str:
         """ Shorten generated sklearn flow name to at most `max_length` characters.
 
@@ -135,6 +133,7 @@ def trim_flow_name(
         str
 
         """
+
         def remove_all_in_parentheses(string: str) -> str:
             string, removals = re.subn(r"\([^()]*\)", "", string)
             while removals > 0:
@@ -150,70 +149,73 @@ def remove_all_in_parentheses(string: str) -> str:
         # VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
         # Estimator=sklearn.model_selection._search.RandomizedSearchCV(estimator=
         # sklearn.tree.tree.DecisionTreeClassifier))
-        if 'sklearn.model_selection' in long_name:
-            start_index = long_name.index('sklearn.model_selection')
-            estimator_start = (start_index
-                               + long_name[start_index:].index('estimator=')
-                               + len('estimator='))
+        if "sklearn.model_selection" in long_name:
+            start_index = long_name.index("sklearn.model_selection")
+            estimator_start = (
+                start_index + long_name[start_index:].index("estimator=") + len("estimator=")
+            )
 
             model_select_boilerplate = long_name[start_index:estimator_start]
             # above is .g. "sklearn.model_selection._search.RandomizedSearchCV(estimator="
-            model_selection_class = model_select_boilerplate.split('(')[0].split('.')[-1]
+            model_selection_class = model_select_boilerplate.split("(")[0].split(".")[-1]
 
             # Now we want to also find and parse the `estimator`, for this we find the closing
             # parenthesis to the model selection technique:
             closing_parenthesis_expected = 1
             for i, char in enumerate(long_name[estimator_start:], start=estimator_start):
-                if char == '(':
+                if char == "(":
                     closing_parenthesis_expected += 1
-                if char == ')':
+                if char == ")":
                     closing_parenthesis_expected -= 1
                 if closing_parenthesis_expected == 0:
                     break
 
             model_select_pipeline = long_name[estimator_start:i]
             trimmed_pipeline = cls.trim_flow_name(model_select_pipeline, _outer=False)
-            _, trimmed_pipeline = trimmed_pipeline.split('.', maxsplit=1)  # trim module prefix
+            _, trimmed_pipeline = trimmed_pipeline.split(".", maxsplit=1)  # trim module prefix
             model_select_short = "sklearn.{}[{}]".format(model_selection_class, trimmed_pipeline)
-            name = long_name[:start_index] + model_select_short + long_name[i + 1:]
+            name = long_name[:start_index] + model_select_short + long_name[i + 1 :]
         else:
             name = long_name
 
-        module_name = long_name.split('.')[0]
-        short_name = module_name + '.{}'
+        module_name = long_name.split(".")[0]
+        short_name = module_name + ".{}"
 
-        if name.startswith('sklearn.pipeline'):
-            full_pipeline_class, pipeline = name[:-1].split('(', maxsplit=1)
-            pipeline_class = full_pipeline_class.split('.')[-1]
+        if name.startswith("sklearn.pipeline"):
+            full_pipeline_class, pipeline = name[:-1].split("(", maxsplit=1)
+            pipeline_class = full_pipeline_class.split(".")[-1]
             # We don't want nested pipelines in the short name, so we trim all complicated
             # subcomponents, i.e. those with parentheses:
             pipeline = remove_all_in_parentheses(pipeline)
 
             # then the pipeline steps are formatted e.g.:
             # step1name=sklearn.submodule.ClassName,step2name...
-            components = [component.split('.')[-1] for component in pipeline.split(',')]
-            pipeline = "{}({})".format(pipeline_class, ','.join(components))
+            components = [component.split(".")[-1] for component in pipeline.split(",")]
+            pipeline = "{}({})".format(pipeline_class, ",".join(components))
             if len(short_name.format(pipeline)) > extra_trim_length:
                 pipeline = "{}(...,{})".format(pipeline_class, components[-1])
         else:
             # Just a simple component: e.g. sklearn.tree.DecisionTreeClassifier
-            pipeline = remove_all_in_parentheses(name).split('.')[-1]
+            pipeline = remove_all_in_parentheses(name).split(".")[-1]
 
         if not _outer:
             # Anything from parenthesis in inner calls should not be culled, so we use brackets
-            pipeline = pipeline.replace('(', '[').replace(')', ']')
+            pipeline = pipeline.replace("(", "[").replace(")", "]")
         else:
             # Square brackets may be introduced with nested model_selection
-            pipeline = pipeline.replace('[', '(').replace(']', ')')
+            pipeline = pipeline.replace("[", "(").replace("]", ")")
 
         return short_name.format(pipeline)
 
     ################################################################################################
     # Methods for flow serialization and de-serialization
 
-    def flow_to_model(self, flow: 'OpenMLFlow',
-                      initialize_with_defaults: bool = False,
-                      strict_version: bool = True) -> Any:
+    def flow_to_model(
+        self,
+        flow: "OpenMLFlow",
+        initialize_with_defaults: bool = False,
+        strict_version: bool = True,
+    ) -> Any:
         """Initializes a sklearn model based on a flow.
 
         Parameters
@@ -234,8 +236,8 @@ def flow_to_model(self, flow: 'OpenMLFlow',
         mixed
         """
         return self._deserialize_sklearn(
-            flow, initialize_with_defaults=initialize_with_defaults,
-            strict_version=strict_version)
+            flow, initialize_with_defaults=initialize_with_defaults, strict_version=strict_version
+        )
 
     def _deserialize_sklearn(
         self,
@@ -275,8 +277,10 @@ def _deserialize_sklearn(
         mixed
         """
 
-        logger.info('-%s flow_to_sklearn START o=%s, components=%s, init_defaults=%s'
-                    % ('-' * recursion_depth, o, components, initialize_with_defaults))
+        logger.info(
+            "-%s flow_to_sklearn START o=%s, components=%s, init_defaults=%s"
+            % ("-" * recursion_depth, o, components, initialize_with_defaults)
+        )
         depth_pp = recursion_depth + 1  # shortcut var, depth plus plus
 
         # First, we need to check whether the presented object is a json string.
@@ -294,21 +298,22 @@ def _deserialize_sklearn(
             # Check if the dict encodes a 'special' object, which could not
             # easily converted into a string, but rather the information to
             # re-create the object were stored in a dictionary.
-            if 'oml-python:serialized_object' in o:
-                serialized_type = o['oml-python:serialized_object']
-                value = o['value']
-                if serialized_type == 'type':
+            if "oml-python:serialized_object" in o:
+                serialized_type = o["oml-python:serialized_object"]
+                value = o["value"]
+                if serialized_type == "type":
                     rval = self._deserialize_type(value)
-                elif serialized_type == 'rv_frozen':
+                elif serialized_type == "rv_frozen":
                     rval = self._deserialize_rv_frozen(value)
-                elif serialized_type == 'function':
+                elif serialized_type == "function":
                     rval = self._deserialize_function(value)
-                elif serialized_type == 'component_reference':
+                elif serialized_type == "component_reference":
                     assert components is not None  # Necessary for mypy
-                    value = self._deserialize_sklearn(value, recursion_depth=depth_pp,
-                                                      strict_version=strict_version)
-                    step_name = value['step_name']
-                    key = value['key']
+                    value = self._deserialize_sklearn(
+                        value, recursion_depth=depth_pp, strict_version=strict_version
+                    )
+                    step_name = value["step_name"]
+                    key = value["key"]
                     component = self._deserialize_sklearn(
                         components[key],
                         initialize_with_defaults=initialize_with_defaults,
@@ -321,17 +326,16 @@ def _deserialize_sklearn(
                     del components[key]
                     if step_name is None:
                         rval = component
-                    elif 'argument_1' not in value:
+                    elif "argument_1" not in value:
                         rval = (step_name, component)
                     else:
-                        rval = (step_name, component, value['argument_1'])
-                elif serialized_type == 'cv_object':
+                        rval = (step_name, component, value["argument_1"])
+                elif serialized_type == "cv_object":
                     rval = self._deserialize_cross_validator(
-                        value, recursion_depth=recursion_depth,
-                        strict_version=strict_version
+                        value, recursion_depth=recursion_depth, strict_version=strict_version
                     )
                 else:
-                    raise ValueError('Cannot flow_to_sklearn %s' % serialized_type)
+                    raise ValueError("Cannot flow_to_sklearn %s" % serialized_type)
 
             else:
                 rval = OrderedDict(
@@ -341,15 +345,15 @@ def _deserialize_sklearn(
                             components=components,
                             initialize_with_defaults=initialize_with_defaults,
                             recursion_depth=depth_pp,
-                            strict_version=strict_version
+                            strict_version=strict_version,
                         ),
                         self._deserialize_sklearn(
                             o=value,
                             components=components,
                             initialize_with_defaults=initialize_with_defaults,
                             recursion_depth=depth_pp,
-                            strict_version=strict_version
-                        )
+                            strict_version=strict_version,
+                        ),
                     )
                     for key, value in sorted(o.items())
                 )
@@ -360,7 +364,7 @@ def _deserialize_sklearn(
                     components=components,
                     initialize_with_defaults=initialize_with_defaults,
                     recursion_depth=depth_pp,
-                    strict_version=strict_version
+                    strict_version=strict_version,
                 )
                 for element in o
             ]
@@ -370,19 +374,19 @@ def _deserialize_sklearn(
             rval = o
         elif isinstance(o, OpenMLFlow):
             if not self._is_sklearn_flow(o):
-                raise ValueError('Only sklearn flows can be reinstantiated')
+                raise ValueError("Only sklearn flows can be reinstantiated")
             rval = self._deserialize_model(
                 flow=o,
                 keep_defaults=initialize_with_defaults,
                 recursion_depth=recursion_depth,
-                strict_version=strict_version
+                strict_version=strict_version,
             )
         else:
             raise TypeError(o)
-        logger.info('-%s flow_to_sklearn END   o=%s, rval=%s' % ('-' * recursion_depth, o, rval))
+        logger.info("-%s flow_to_sklearn END   o=%s, rval=%s" % ("-" * recursion_depth, o, rval))
         return rval
 
-    def model_to_flow(self, model: Any) -> 'OpenMLFlow':
+    def model_to_flow(self, model: Any) -> "OpenMLFlow":
         """Transform a scikit-learn model to a flow for uploading it to OpenML.
 
         Parameters
@@ -421,9 +425,10 @@ def _serialize_sklearn(self, o: Any, parent_model: Optional[Any] = None) -> Any:
             rval = OrderedDict()
             for key, value in o.items():
                 if not isinstance(key, str):
-                    raise TypeError('Can only use string as keys, you passed '
-                                    'type %s for value %s.' %
-                                    (type(key), str(key)))
+                    raise TypeError(
+                        "Can only use string as keys, you passed "
+                        "type %s for value %s." % (type(key), str(key))
+                    )
                 key = self._serialize_sklearn(key, parent_model)
                 value = self._serialize_sklearn(value, parent_model)
                 rval[key] = value
@@ -464,11 +469,10 @@ def get_version_information(self) -> List[str]:
         import numpy
 
         major, minor, micro, _, _ = sys.version_info
-        python_version = 'Python_{}.'.format(
-            ".".join([str(major), str(minor), str(micro)]))
-        sklearn_version = 'Sklearn_{}.'.format(sklearn.__version__)
-        numpy_version = 'NumPy_{}.'.format(numpy.__version__)
-        scipy_version = 'SciPy_{}.'.format(scipy.__version__)
+        python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)]))
+        sklearn_version = "Sklearn_{}.".format(sklearn.__version__)
+        numpy_version = "NumPy_{}.".format(numpy.__version__)
+        scipy_version = "SciPy_{}.".format(scipy.__version__)
 
         return [python_version, sklearn_version, numpy_version, scipy_version]
 
@@ -492,19 +496,18 @@ def _is_cross_validator(self, o: Any) -> bool:
 
     @classmethod
     def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        if (getattr(flow, 'dependencies', None) is not None
-                and "sklearn" in flow.dependencies):
+        if getattr(flow, "dependencies", None) is not None and "sklearn" in flow.dependencies:
             return True
         if flow.external_version is None:
             return False
         else:
             return (
-                flow.external_version.startswith('sklearn==')
-                or ',sklearn==' in flow.external_version
+                flow.external_version.startswith("sklearn==")
+                or ",sklearn==" in flow.external_version
             )
 
     def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
-        '''Fetches the sklearn function docstring for the flow description
+        """Fetches the sklearn function docstring for the flow description
 
         Retrieves the sklearn docstring available and does the following:
         * If length of docstring <= char_lim, then returns the complete docstring
@@ -523,12 +526,14 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         Returns
         -------
         str
-        '''
+        """
+
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * '-')
+            return "{}\n{}\n".format(s, len(s) * "-")
+
         s = inspect.getdoc(model)
         if s is None:
-            return ''
+            return ""
         try:
             # trim till 'Read more'
             pattern = "Read more in the :ref:"
@@ -536,11 +541,13 @@ def match_format(s):
             s = s[:index]
             # trimming docstring to be within char_lim
             if len(s) > char_lim:
-                s = "{}...".format(s[:char_lim - 3])
+                s = "{}...".format(s[: char_lim - 3])
             return s.strip()
         except ValueError:
-            logger.warning("'Read more' not found in descriptions. "
-                           "Trying to trim till 'Parameters' if available in docstring.")
+            logger.warning(
+                "'Read more' not found in descriptions. "
+                "Trying to trim till 'Parameters' if available in docstring."
+            )
             pass
         try:
             # if 'Read more' doesn't exist, trim till 'Parameters'
@@ -553,11 +560,11 @@ def match_format(s):
         s = s[:index]
         # trimming docstring to be within char_lim
         if len(s) > char_lim:
-            s = "{}...".format(s[:char_lim - 3])
+            s = "{}...".format(s[: char_lim - 3])
         return s.strip()
 
     def _extract_sklearn_parameter_docstring(self, model) -> Union[None, str]:
-        '''Extracts the part of sklearn docstring containing parameter information
+        """Extracts the part of sklearn docstring containing parameter information
 
         Fetches the entire docstring and trims just the Parameter section.
         The assumption is that 'Parameters' is the first section in sklearn docstrings,
@@ -572,9 +579,11 @@ def _extract_sklearn_parameter_docstring(self, model) -> Union[None, str]:
         Returns
         -------
         str, or None
-        '''
+        """
+
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * '-')
+            return "{}\n{}\n".format(s, len(s) * "-")
+
         s = inspect.getdoc(model)
         if s is None:
             return None
@@ -601,7 +610,7 @@ def match_format(s):
         return s.strip()
 
     def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]:
-        '''Parses parameter type and description from sklearn dosctring
+        """Parses parameter type and description from sklearn dosctring
 
         Parameters
         ----------
@@ -613,7 +622,7 @@ def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]
         Returns
         -------
         Dict, or None
-        '''
+        """
         docstring = self._extract_sklearn_parameter_docstring(model)
         if docstring is None:
             # when sklearn docstring has no 'Parameters' section
@@ -636,7 +645,7 @@ def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]
                 # creating placeholder when parameter found which will be a list of strings
                 # string descriptions will be appended in subsequent iterations
                 # till another parameter is found and a new placeholder is created
-                placeholder = ['']  # type: List[str]
+                placeholder = [""]  # type: List[str]
                 description.append(placeholder)
             else:
                 if len(description) > 0:  # description=[] means no parameters found yet
@@ -644,16 +653,16 @@ def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]
                     description[-1].append(s)
         for i in range(len(description)):
             # concatenating parameter description strings
-            description[i] = '\n'.join(description[i]).strip()
+            description[i] = "\n".join(description[i]).strip()
             # limiting all parameter descriptions to accepted OpenML string length
             if len(description[i]) > char_lim:
-                description[i] = "{}...".format(description[i][:char_lim - 3])
+                description[i] = "{}...".format(description[i][: char_lim - 3])
 
         # collecting parameters and their types
         parameter_docs = OrderedDict()  # type: Dict
         matches = p.findall(docstring)
         for i, param in enumerate(matches):
-            key, value = str(param).split(':')
+            key, value = str(param).split(":")
             parameter_docs[key.strip()] = [value.strip(), description[i]]
 
         # to avoid KeyError for missing parameters
@@ -681,8 +690,12 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
         """
 
         # Get all necessary information about the model objects itself
-        parameters, parameters_meta_info, subcomponents, subcomponents_explicit = \
-            self._extract_information_from_model(model)
+        (
+            parameters,
+            parameters_meta_info,
+            subcomponents,
+            subcomponents_explicit,
+        ) = self._extract_information_from_model(model)
 
         # Check that a component does not occur multiple times in a flow as this
         # is not supported by OpenML
@@ -707,7 +720,7 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
 
         if sub_components_names:
             # slice operation on string in order to get rid of leading comma
-            name = '%s(%s)' % (class_name, sub_components_names[1:])
+            name = "%s(%s)" % (class_name, sub_components_names[1:])
         else:
             name = class_name
         short_name = SklearnExtension.trim_flow_name(name)
@@ -715,60 +728,63 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
         # Get the external versions of all sub-components
         external_version = self._get_external_version_string(model, subcomponents)
 
-        dependencies = '\n'.join([
-            self._format_external_version(
-                'sklearn',
-                sklearn.__version__,
-            ),
-            'numpy>=1.6.1',
-            'scipy>=0.9',
-        ])
+        dependencies = "\n".join(
+            [
+                self._format_external_version("sklearn", sklearn.__version__,),
+                "numpy>=1.6.1",
+                "scipy>=0.9",
+            ]
+        )
 
-        sklearn_version = self._format_external_version('sklearn', sklearn.__version__)
-        sklearn_version_formatted = sklearn_version.replace('==', '_')
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
+        sklearn_version_formatted = sklearn_version.replace("==", "_")
 
         sklearn_description = self._get_sklearn_description(model)
-        flow = OpenMLFlow(name=name,
-                          class_name=class_name,
-                          custom_name=short_name,
-                          description=sklearn_description,
-                          model=model,
-                          components=subcomponents,
-                          parameters=parameters,
-                          parameters_meta_info=parameters_meta_info,
-                          external_version=external_version,
-                          tags=['openml-python', 'sklearn', 'scikit-learn',
-                                'python', sklearn_version_formatted,
-                                # TODO: add more tags based on the scikit-learn
-                                # module a flow is in? For example automatically
-                                # annotate a class of sklearn.svm.SVC() with the
-                                # tag svm?
-                                ],
-                          extension=self,
-                          language='English',
-                          # TODO fill in dependencies!
-                          dependencies=dependencies)
+        flow = OpenMLFlow(
+            name=name,
+            class_name=class_name,
+            custom_name=short_name,
+            description=sklearn_description,
+            model=model,
+            components=subcomponents,
+            parameters=parameters,
+            parameters_meta_info=parameters_meta_info,
+            external_version=external_version,
+            tags=[
+                "openml-python",
+                "sklearn",
+                "scikit-learn",
+                "python",
+                sklearn_version_formatted,
+                # TODO: add more tags based on the scikit-learn
+                # module a flow is in? For example automatically
+                # annotate a class of sklearn.svm.SVC() with the
+                # tag svm?
+            ],
+            extension=self,
+            language="English",
+            # TODO fill in dependencies!
+            dependencies=dependencies,
+        )
 
         return flow
 
     def _get_external_version_string(
-        self,
-        model: Any,
-        sub_components: Dict[str, OpenMLFlow],
+        self, model: Any, sub_components: Dict[str, OpenMLFlow],
     ) -> str:
         # Create external version string for a flow, given the model and the
         # already parsed dictionary of sub_components. Retrieves the external
         # version of all subcomponents, which themselves already contain all
         # requirements for their subcomponents. The external version string is a
         # sorted concatenation of all modules which are present in this run.
-        model_package_name = model.__module__.split('.')[0]
+        model_package_name = model.__module__.split(".")[0]
         module = importlib.import_module(model_package_name)
         model_package_version_number = module.__version__  # type: ignore
         external_version = self._format_external_version(
             model_package_name, model_package_version_number,
         )
-        openml_version = self._format_external_version('openml', openml.__version__)
-        sklearn_version = self._format_external_version('sklearn', sklearn.__version__)
+        openml_version = self._format_external_version("openml", openml.__version__)
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
 
         external_versions = set()
         external_versions.add(external_version)
@@ -778,14 +794,12 @@ def _get_external_version_string(
             # 'drop', 'passthrough', None can be passed as estimators
             if isinstance(visitee, str):
                 continue
-            for external_version in visitee.external_version.split(','):
+            for external_version in visitee.external_version.split(","):
                 external_versions.add(external_version)
-        return ','.join(list(sorted(external_versions)))
+        return ",".join(list(sorted(external_versions)))
 
     def _check_multiple_occurence_of_component_in_flow(
-        self,
-        model: Any,
-        sub_components: Dict[str, OpenMLFlow],
+        self, model: Any, sub_components: Dict[str, OpenMLFlow],
     ) -> None:
         to_visit_stack = []  # type: List[OpenMLFlow]
         to_visit_stack.extend(sub_components.values())
@@ -796,19 +810,20 @@ def _check_multiple_occurence_of_component_in_flow(
             if isinstance(visitee, str):  # 'drop', 'passthrough' can be passed as estimators
                 known_sub_components.add(visitee)
             elif visitee.name in known_sub_components:
-                raise ValueError('Found a second occurence of component %s when '
-                                 'trying to serialize %s.' % (visitee.name, model))
+                raise ValueError(
+                    "Found a second occurence of component %s when "
+                    "trying to serialize %s." % (visitee.name, model)
+                )
             else:
                 known_sub_components.add(visitee.name)
                 to_visit_stack.extend(visitee.components.values())
 
     def _extract_information_from_model(
-        self,
-        model: Any,
+        self, model: Any,
     ) -> Tuple[
-        'OrderedDict[str, Optional[str]]',
-        'OrderedDict[str, Optional[Dict]]',
-        'OrderedDict[str, OpenMLFlow]',
+        "OrderedDict[str, Optional[str]]",
+        "OrderedDict[str, Optional[Dict]]",
+        "OrderedDict[str, OpenMLFlow]",
         Set,
     ]:
         # This function contains four "global" states and is quite long and
@@ -850,9 +865,8 @@ def flatten_all(list_):
             )
 
             # Check that all list elements are of simple types.
-            nested_list_of_simple_types = (
-                is_non_empty_list_of_lists_with_same_type
-                and all([isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval)])
+            nested_list_of_simple_types = is_non_empty_list_of_lists_with_same_type and all(
+                [isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval)]
             )
 
             if is_non_empty_list_of_lists_with_same_type and not nested_list_of_simple_types:
@@ -870,46 +884,52 @@ def flatten_all(list_):
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
-                        msg = 'Length of tuple does not match assumptions'
+                        msg = "Length of tuple does not match assumptions"
                         raise ValueError(msg)
 
                     if isinstance(sub_component, str):
-                        if sub_component != 'drop' and sub_component != 'passthrough':
-                            msg = 'Second item of tuple does not match assumptions. ' \
-                                  'If string, can be only \'drop\' or \'passthrough\' but' \
-                                  'got %s' % sub_component
+                        if sub_component != "drop" and sub_component != "passthrough":
+                            msg = (
+                                "Second item of tuple does not match assumptions. "
+                                "If string, can be only 'drop' or 'passthrough' but"
+                                "got %s" % sub_component
+                            )
                             raise ValueError(msg)
                         else:
                             pass
                     elif isinstance(sub_component, type(None)):
-                        msg = 'Cannot serialize objects of None type. Please use a valid ' \
-                              'placeholder for None. Note that empty sklearn estimators can be '\
-                              'replaced with \'drop\' or \'passthrough\'.'
+                        msg = (
+                            "Cannot serialize objects of None type. Please use a valid "
+                            "placeholder for None. Note that empty sklearn estimators can be "
+                            "replaced with 'drop' or 'passthrough'."
+                        )
                         raise ValueError(msg)
                     elif not isinstance(sub_component, OpenMLFlow):
-                        msg = 'Second item of tuple does not match assumptions. ' \
-                              'Expected OpenMLFlow, got %s' % type(sub_component)
+                        msg = (
+                            "Second item of tuple does not match assumptions. "
+                            "Expected OpenMLFlow, got %s" % type(sub_component)
+                        )
                         raise TypeError(msg)
 
                     if identifier in reserved_keywords:
-                        parent_model = "{}.{}".format(model.__module__,
-                                                      model.__class__.__name__)
-                        msg = 'Found element shadowing official ' \
-                              'parameter for %s: %s' % (parent_model,
-                                                        identifier)
+                        parent_model = "{}.{}".format(model.__module__, model.__class__.__name__)
+                        msg = "Found element shadowing official " "parameter for %s: %s" % (
+                            parent_model,
+                            identifier,
+                        )
                         raise PyOpenMLError(msg)
 
                     # when deserializing the parameter
                     sub_components_explicit.add(identifier)
                     sub_components[identifier] = sub_component
                     component_reference = OrderedDict()  # type: Dict[str, Union[str, Dict]]
-                    component_reference['oml-python:serialized_object'] = 'component_reference'
+                    component_reference["oml-python:serialized_object"] = "component_reference"
                     cr_value = OrderedDict()  # type: Dict[str, Any]
-                    cr_value['key'] = identifier
-                    cr_value['step_name'] = identifier
+                    cr_value["key"] = identifier
+                    cr_value["step_name"] = identifier
                     if len(sub_component_tuple) == 3:
-                        cr_value['argument_1'] = sub_component_tuple[2]
-                    component_reference['value'] = cr_value
+                        cr_value["argument_1"] = sub_component_tuple[2]
+                    component_reference["value"] = cr_value
                     parameter_value.append(component_reference)
 
                 # Here (and in the elif and else branch below) are the only
@@ -929,17 +949,17 @@ def flatten_all(list_):
                 sub_components[k] = rval
                 sub_components_explicit.add(k)
                 component_reference = OrderedDict()
-                component_reference['oml-python:serialized_object'] = 'component_reference'
+                component_reference["oml-python:serialized_object"] = "component_reference"
                 cr_value = OrderedDict()
-                cr_value['key'] = k
-                cr_value['step_name'] = None
-                component_reference['value'] = cr_value
+                cr_value["key"] = k
+                cr_value["step_name"] = None
+                component_reference["value"] = cr_value
                 cr = self._serialize_sklearn(component_reference, model)
                 parameters[k] = json.dumps(cr)
 
             else:
                 # a regular hyperparameter
-                if not (hasattr(rval, '__len__') and len(rval) == 0):
+                if not (hasattr(rval, "__len__") and len(rval) == 0):
                     rval = json.dumps(rval)
                     parameters[k] = rval
                 else:
@@ -947,10 +967,11 @@ def flatten_all(list_):
 
             if parameters_docs is not None:
                 data_type, description = parameters_docs[k]
-                parameters_meta_info[k] = OrderedDict((('description', description),
-                                                       ('data_type', data_type)))
+                parameters_meta_info[k] = OrderedDict(
+                    (("description", description), ("data_type", data_type))
+                )
             else:
-                parameters_meta_info[k] = OrderedDict((('description', None), ('data_type', None)))
+                parameters_meta_info[k] = OrderedDict((("description", None), ("data_type", None)))
 
         return parameters, parameters_meta_info, sub_components, sub_components_explicit
 
@@ -986,12 +1007,11 @@ def _deserialize_model(
         flow: OpenMLFlow,
         keep_defaults: bool,
         recursion_depth: int,
-        strict_version: bool = True
+        strict_version: bool = True,
     ) -> Any:
-        logger.info('-%s deserialize %s' % ('-' * recursion_depth, flow.name))
+        logger.info("-%s deserialize %s" % ("-" * recursion_depth, flow.name))
         model_name = flow.class_name
-        self._check_dependencies(flow.dependencies,
-                                 strict_version=strict_version)
+        self._check_dependencies(flow.dependencies, strict_version=strict_version)
 
         parameters = flow.parameters
         components = flow.components
@@ -1006,7 +1026,7 @@ def _deserialize_model(
 
         for name in parameters:
             value = parameters.get(name)
-            logger.info('--%s flow_parameter=%s, value=%s' % ('-' * recursion_depth, name, value))
+            logger.info("--%s flow_parameter=%s, value=%s" % ("-" * recursion_depth, name, value))
             rval = self._deserialize_sklearn(
                 value,
                 components=components_,
@@ -1022,22 +1042,18 @@ def _deserialize_model(
             if name not in components_:
                 continue
             value = components[name]
-            logger.info('--%s flow_component=%s, value=%s' % ('-' * recursion_depth, name, value))
+            logger.info("--%s flow_component=%s, value=%s" % ("-" * recursion_depth, name, value))
             rval = self._deserialize_sklearn(
-                value,
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version
+                value, recursion_depth=recursion_depth + 1, strict_version=strict_version
             )
             parameter_dict[name] = rval
 
-        module_name = model_name.rsplit('.', 1)
-        model_class = getattr(importlib.import_module(module_name[0]),
-                              module_name[1])
+        module_name = model_name.rsplit(".", 1)
+        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
 
         if keep_defaults:
             # obtain all params with a default
-            param_defaults, _ = \
-                self._get_fn_arguments_with_defaults(model_class.__init__)
+            param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
 
             # delete the params that have a default from the dict,
             # so they get initialized with their default value
@@ -1052,93 +1068,98 @@ def _deserialize_model(
                     del parameter_dict[param]
         return model_class(**parameter_dict)
 
-    def _check_dependencies(self, dependencies: str,
-                            strict_version: bool = True) -> None:
+    def _check_dependencies(self, dependencies: str, strict_version: bool = True) -> None:
         if not dependencies:
             return
 
-        dependencies_list = dependencies.split('\n')
+        dependencies_list = dependencies.split("\n")
         for dependency_string in dependencies_list:
             match = DEPENDENCIES_PATTERN.match(dependency_string)
             if not match:
-                raise ValueError('Cannot parse dependency %s' % dependency_string)
+                raise ValueError("Cannot parse dependency %s" % dependency_string)
 
-            dependency_name = match.group('name')
-            operation = match.group('operation')
-            version = match.group('version')
+            dependency_name = match.group("name")
+            operation = match.group("operation")
+            version = match.group("version")
 
             module = importlib.import_module(dependency_name)
             required_version = LooseVersion(version)
             installed_version = LooseVersion(module.__version__)  # type: ignore
 
-            if operation == '==':
+            if operation == "==":
                 check = required_version == installed_version
-            elif operation == '>':
+            elif operation == ">":
                 check = installed_version > required_version
-            elif operation == '>=':
-                check = (installed_version > required_version
-                         or installed_version == required_version)
+            elif operation == ">=":
+                check = (
+                    installed_version > required_version or installed_version == required_version
+                )
             else:
-                raise NotImplementedError(
-                    'operation \'%s\' is not supported' % operation)
-            message = ('Trying to deserialize a model with dependency '
-                       '%s not satisfied.' % dependency_string)
+                raise NotImplementedError("operation '%s' is not supported" % operation)
+            message = (
+                "Trying to deserialize a model with dependency "
+                "%s not satisfied." % dependency_string
+            )
             if not check:
                 if strict_version:
                     raise ValueError(message)
                 else:
                     warnings.warn(message)
 
-    def _serialize_type(self, o: Any) -> 'OrderedDict[str, str]':
-        mapping = {float: 'float',
-                   np.float: 'np.float',
-                   np.float32: 'np.float32',
-                   np.float64: 'np.float64',
-                   int: 'int',
-                   np.int: 'np.int',
-                   np.int32: 'np.int32',
-                   np.int64: 'np.int64'}
+    def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
+        mapping = {
+            float: "float",
+            np.float: "np.float",
+            np.float32: "np.float32",
+            np.float64: "np.float64",
+            int: "int",
+            np.int: "np.int",
+            np.int32: "np.int32",
+            np.int64: "np.int64",
+        }
         ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret['oml-python:serialized_object'] = 'type'
-        ret['value'] = mapping[o]
+        ret["oml-python:serialized_object"] = "type"
+        ret["value"] = mapping[o]
         return ret
 
     def _deserialize_type(self, o: str) -> Any:
-        mapping = {'float': float,
-                   'np.float': np.float,
-                   'np.float32': np.float32,
-                   'np.float64': np.float64,
-                   'int': int,
-                   'np.int': np.int,
-                   'np.int32': np.int32,
-                   'np.int64': np.int64}
+        mapping = {
+            "float": float,
+            "np.float": np.float,
+            "np.float32": np.float32,
+            "np.float64": np.float64,
+            "int": int,
+            "np.int": np.int,
+            "np.int32": np.int32,
+            "np.int64": np.int64,
+        }
         return mapping[o]
 
-    def _serialize_rv_frozen(self, o: Any) -> 'OrderedDict[str, Union[str, Dict]]':
+    def _serialize_rv_frozen(self, o: Any) -> "OrderedDict[str, Union[str, Dict]]":
         args = o.args
         kwds = o.kwds
         a = o.a
         b = o.b
-        dist = o.dist.__class__.__module__ + '.' + o.dist.__class__.__name__
+        dist = o.dist.__class__.__module__ + "." + o.dist.__class__.__name__
         ret = OrderedDict()  # type: 'OrderedDict[str, Union[str, Dict]]'
-        ret['oml-python:serialized_object'] = 'rv_frozen'
-        ret['value'] = OrderedDict((('dist', dist), ('a', a), ('b', b),
-                                    ('args', args), ('kwds', kwds)))
+        ret["oml-python:serialized_object"] = "rv_frozen"
+        ret["value"] = OrderedDict(
+            (("dist", dist), ("a", a), ("b", b), ("args", args), ("kwds", kwds))
+        )
         return ret
 
-    def _deserialize_rv_frozen(self, o: 'OrderedDict[str, str]') -> Any:
-        args = o['args']
-        kwds = o['kwds']
-        a = o['a']
-        b = o['b']
-        dist_name = o['dist']
+    def _deserialize_rv_frozen(self, o: "OrderedDict[str, str]") -> Any:
+        args = o["args"]
+        kwds = o["kwds"]
+        a = o["a"]
+        b = o["b"]
+        dist_name = o["dist"]
 
-        module_name = dist_name.rsplit('.', 1)
+        module_name = dist_name.rsplit(".", 1)
         try:
-            rv_class = getattr(importlib.import_module(module_name[0]),
-                               module_name[1])
+            rv_class = getattr(importlib.import_module(module_name[0]), module_name[1])
         except AttributeError:
-            warnings.warn('Cannot create model %s for flow.' % dist_name)
+            warnings.warn("Cannot create model %s for flow." % dist_name)
             return None
 
         dist = scipy.stats.distributions.rv_frozen(rv_class(), *args, **kwds)
@@ -1147,34 +1168,39 @@ def _deserialize_rv_frozen(self, o: 'OrderedDict[str, str]') -> Any:
 
         return dist
 
-    def _serialize_function(self, o: Callable) -> 'OrderedDict[str, str]':
-        name = o.__module__ + '.' + o.__name__
+    def _serialize_function(self, o: Callable) -> "OrderedDict[str, str]":
+        name = o.__module__ + "." + o.__name__
         ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret['oml-python:serialized_object'] = 'function'
-        ret['value'] = name
+        ret["oml-python:serialized_object"] = "function"
+        ret["value"] = name
         return ret
 
     def _deserialize_function(self, name: str) -> Callable:
-        module_name = name.rsplit('.', 1)
+        module_name = name.rsplit(".", 1)
         function_handle = getattr(importlib.import_module(module_name[0]), module_name[1])
         return function_handle
 
-    def _serialize_cross_validator(self, o: Any) -> 'OrderedDict[str, Union[str, Dict]]':
+    def _serialize_cross_validator(self, o: Any) -> "OrderedDict[str, Union[str, Dict]]":
         ret = OrderedDict()  # type: 'OrderedDict[str, Union[str, Dict]]'
 
         parameters = OrderedDict()  # type: 'OrderedDict[str, Any]'
 
         # XXX this is copied from sklearn.model_selection._split
         cls = o.__class__
-        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
+        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
         # Ignore varargs, kw and default values and pop self
         init_signature = inspect.signature(init)
         # Consider the constructor parameters excluding 'self'
         if init is object.__init__:
             args = []  # type: List
         else:
-            args = sorted([p.name for p in init_signature.parameters.values()
-                           if p.name != 'self' and p.kind != p.VAR_KEYWORD])
+            args = sorted(
+                [
+                    p.name
+                    for p in init_signature.parameters.values()
+                    if p.name != "self" and p.kind != p.VAR_KEYWORD
+                ]
+            )
 
         for key in args:
             # We need deprecation warnings to always be on in order to
@@ -1188,49 +1214,44 @@ def _serialize_cross_validator(self, o: Any) -> 'OrderedDict[str, Union[str, Dic
                     # if the parameter is deprecated, don't show it
                     continue
 
-            if not (hasattr(value, '__len__') and len(value) == 0):
+            if not (hasattr(value, "__len__") and len(value) == 0):
                 value = json.dumps(value)
                 parameters[key] = value
             else:
                 parameters[key] = None
 
-        ret['oml-python:serialized_object'] = 'cv_object'
+        ret["oml-python:serialized_object"] = "cv_object"
         name = o.__module__ + "." + o.__class__.__name__
-        value = OrderedDict([('name', name), ('parameters', parameters)])
-        ret['value'] = value
+        value = OrderedDict([("name", name), ("parameters", parameters)])
+        ret["value"] = value
 
         return ret
 
     def _deserialize_cross_validator(
-        self,
-        value: 'OrderedDict[str, Any]',
-        recursion_depth: int,
-        strict_version: bool = True
+        self, value: "OrderedDict[str, Any]", recursion_depth: int, strict_version: bool = True
     ) -> Any:
-        model_name = value['name']
-        parameters = value['parameters']
+        model_name = value["name"]
+        parameters = value["parameters"]
 
-        module_name = model_name.rsplit('.', 1)
-        model_class = getattr(importlib.import_module(module_name[0]),
-                              module_name[1])
+        module_name = model_name.rsplit(".", 1)
+        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
         for parameter in parameters:
             parameters[parameter] = self._deserialize_sklearn(
                 parameters[parameter],
                 recursion_depth=recursion_depth + 1,
-                strict_version=strict_version
+                strict_version=strict_version,
             )
         return model_class(**parameters)
 
     def _format_external_version(
-        self,
-        model_package_name: str,
-        model_package_version_number: str,
+        self, model_package_name: str, model_package_version_number: str,
     ) -> str:
-        return '%s==%s' % (model_package_name, model_package_version_number)
+        return "%s==%s" % (model_package_name, model_package_version_number)
 
     @staticmethod
-    def _get_parameter_values_recursive(param_grid: Union[Dict, List[Dict]],
-                                        parameter_name: str) -> List[Any]:
+    def _get_parameter_values_recursive(
+        param_grid: Union[Dict, List[Dict]], parameter_name: str
+    ) -> List[Any]:
         """
         Returns a list of values for a given hyperparameter, encountered
         recursively throughout the flow. (e.g., n_jobs can be defined
@@ -1254,17 +1275,18 @@ def _get_parameter_values_recursive(param_grid: Union[Dict, List[Dict]],
             result = list()
             for param, value in param_grid.items():
                 # n_jobs is scikit-learn parameter for parallelizing jobs
-                if param.split('__')[-1] == parameter_name:
+                if param.split("__")[-1] == parameter_name:
                     result.append(value)
             return result
         elif isinstance(param_grid, list):
             result = list()
             for sub_grid in param_grid:
-                result.extend(SklearnExtension._get_parameter_values_recursive(sub_grid,
-                                                                               parameter_name))
+                result.extend(
+                    SklearnExtension._get_parameter_values_recursive(sub_grid, parameter_name)
+                )
             return result
         else:
-            raise ValueError('Param_grid should either be a dict or list of dicts')
+            raise ValueError("Param_grid should either be a dict or list of dicts")
 
     def _prevent_optimize_n_jobs(self, model):
         """
@@ -1281,21 +1303,27 @@ def _prevent_optimize_n_jobs(self, model):
             elif isinstance(model, sklearn.model_selection.RandomizedSearchCV):
                 param_distributions = model.param_distributions
             else:
-                if hasattr(model, 'param_distributions'):
+                if hasattr(model, "param_distributions"):
                     param_distributions = model.param_distributions
                 else:
-                    raise AttributeError('Using subclass BaseSearchCV other than '
-                                         '{GridSearchCV, RandomizedSearchCV}. '
-                                         'Could not find attribute '
-                                         'param_distributions.')
-                print('Warning! Using subclass BaseSearchCV other than '
-                      '{GridSearchCV, RandomizedSearchCV}. '
-                      'Should implement param check. ')
-            n_jobs_vals = SklearnExtension._get_parameter_values_recursive(param_distributions,
-                                                                           'n_jobs')
+                    raise AttributeError(
+                        "Using subclass BaseSearchCV other than "
+                        "{GridSearchCV, RandomizedSearchCV}. "
+                        "Could not find attribute "
+                        "param_distributions."
+                    )
+                print(
+                    "Warning! Using subclass BaseSearchCV other than "
+                    "{GridSearchCV, RandomizedSearchCV}. "
+                    "Should implement param check. "
+                )
+            n_jobs_vals = SklearnExtension._get_parameter_values_recursive(
+                param_distributions, "n_jobs"
+            )
             if len(n_jobs_vals) > 0:
-                raise PyOpenMLError('openml-python should not be used to '
-                                    'optimize the n_jobs parameter.')
+                raise PyOpenMLError(
+                    "openml-python should not be used to " "optimize the n_jobs parameter."
+                )
 
     def _can_measure_cputime(self, model: Any) -> bool:
         """
@@ -1312,13 +1340,11 @@ def _can_measure_cputime(self, model: Any) -> bool:
         bool:
             True if all n_jobs parameters will be either set to None or 1, False otherwise
         """
-        if not (
-                isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)
-        ):
-            raise ValueError('model should be BaseEstimator or BaseSearchCV')
+        if not (isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)):
+            raise ValueError("model should be BaseEstimator or BaseSearchCV")
 
         # check the parameters for n_jobs
-        n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), 'n_jobs')
+        n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs")
         for val in n_jobs_vals:
             if val is not None and val != 1:
                 return False
@@ -1339,13 +1365,11 @@ def _can_measure_wallclocktime(self, model: Any) -> bool:
         bool:
             True if no n_jobs parameters is set to -1, False otherwise
         """
-        if not (
-                isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)
-        ):
-            raise ValueError('model should be BaseEstimator or BaseSearchCV')
+        if not (isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)):
+            raise ValueError("model should be BaseEstimator or BaseSearchCV")
 
         # check the parameters for n_jobs
-        n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), 'n_jobs')
+        n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs")
         return -1 not in n_jobs_vals
 
     ################################################################################################
@@ -1366,7 +1390,7 @@ def is_estimator(self, model: Any) -> bool:
         bool
         """
         o = model
-        return hasattr(o, 'fit') and hasattr(o, 'get_params') and hasattr(o, 'set_params')
+        return hasattr(o, "fit") and hasattr(o, "get_params") and hasattr(o, "set_params")
 
     def seed_model(self, model: Any, seed: Optional[int] = None) -> Any:
         """Set the random state of all the unseeded components of a model and return the seeded
@@ -1396,12 +1420,13 @@ def _seed_current_object(current_value):
                 return False
             elif isinstance(current_value, np.random.RandomState):
                 raise ValueError(
-                    'Models initialized with a RandomState object are not '
-                    'supported. Please seed with an integer. ')
+                    "Models initialized with a RandomState object are not "
+                    "supported. Please seed with an integer. "
+                )
             elif current_value is not None:
                 raise ValueError(
-                    'Models should be seeded with int or None (this should never '
-                    'happen). ')
+                    "Models should be seeded with int or None (this should never " "happen). "
+                )
             else:
                 return True
 
@@ -1409,7 +1434,7 @@ def _seed_current_object(current_value):
         model_params = model.get_params()
         random_states = {}
         for param_name in sorted(model_params):
-            if 'random_state' in param_name:
+            if "random_state" in param_name:
                 current_value = model_params[param_name]
                 # important to draw the value at this point (and not in the if
                 # statement) this way we guarantee that if a different set of
@@ -1421,7 +1446,7 @@ def _seed_current_object(current_value):
 
             # Also seed CV objects!
             elif isinstance(model_params[param_name], sklearn.model_selection.BaseCrossValidator):
-                if not hasattr(model_params[param_name], 'random_state'):
+                if not hasattr(model_params[param_name], "random_state"):
                     continue
 
                 current_value = model_params[param_name].random_state
@@ -1435,13 +1460,13 @@ def _seed_current_object(current_value):
     def _run_model_on_fold(
         self,
         model: Any,
-        task: 'OpenMLTask',
+        task: "OpenMLTask",
         X_train: Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame],
         rep_no: int,
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Optional[OpenMLRunTrace]]:
+    ) -> Tuple[np.ndarray, np.ndarray, "OrderedDict[str, float]", Optional[OpenMLRunTrace]]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction
         information.
 
@@ -1510,8 +1535,7 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
             # model_classes: sklearn classifier mapping from original array id to
             # prediction index id
             if not isinstance(classes, list):
-                raise ValueError('please convert model classes to list prior to '
-                                 'calling this fn')
+                raise ValueError("please convert model classes to list prior to " "calling this fn")
             result = np.zeros((len(y), len(classes)), dtype=np.float32)
             for obs, prediction_idx in enumerate(y):
                 result[obs][prediction_idx] = 1.0
@@ -1519,9 +1543,9 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
         if isinstance(task, OpenMLSupervisedTask):
             if y_train is None:
-                raise TypeError('argument y_train must not be of type None')
+                raise TypeError("argument y_train must not be of type None")
             if X_test is None:
-                raise TypeError('argument X_test must not be of type None')
+                raise TypeError("argument X_test must not be of type None")
 
         # TODO: if possible, give a warning if model is already fitted (acceptable
         # in case of custom experimentation,
@@ -1548,11 +1572,11 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
             modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000
             if can_measure_cputime:
-                user_defined_measures['usercpu_time_millis_training'] = modelfit_dur_cputime
+                user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
 
             modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
             if can_measure_wallclocktime:
-                user_defined_measures['wall_clock_time_millis_training'] = modelfit_dur_walltime
+                user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
 
         except AttributeError as e:
             # typically happens when training a regressor on classification task
@@ -1586,16 +1610,19 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
             raise ValueError(task)
 
         if can_measure_cputime:
-            modelpredict_duration_cputime = (time.process_time()
-                                             - modelpredict_start_cputime) * 1000
-            user_defined_measures['usercpu_time_millis_testing'] = modelpredict_duration_cputime
-            user_defined_measures['usercpu_time_millis'] = (modelfit_dur_cputime
-                                                            + modelpredict_duration_cputime)
+            modelpredict_duration_cputime = (
+                time.process_time() - modelpredict_start_cputime
+            ) * 1000
+            user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime
+            user_defined_measures["usercpu_time_millis"] = (
+                modelfit_dur_cputime + modelpredict_duration_cputime
+            )
         if can_measure_wallclocktime:
             modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000
-            user_defined_measures['wall_clock_time_millis_testing'] = modelpredict_duration_walltime
-            user_defined_measures['wall_clock_time_millis'] = (modelfit_dur_walltime
-                                                               + modelpredict_duration_walltime)
+            user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime
+            user_defined_measures["wall_clock_time_millis"] = (
+                modelfit_dur_walltime + modelpredict_duration_walltime
+            )
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
 
@@ -1605,7 +1632,7 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
                 if task.class_labels is not None:
                     proba_y = _prediction_to_probabilities(pred_y, list(task.class_labels))
                 else:
-                    raise ValueError('The task has no class labels')
+                    raise ValueError("The task has no class labels")
 
             if task.class_labels is not None:
                 if proba_y.shape[1] != len(task.class_labels):
@@ -1631,7 +1658,7 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
                     warnings.warn(message)
                     openml.config.logger.warn(message)
             else:
-                raise ValueError('The task has no class labels')
+                raise ValueError("The task has no class labels")
 
         elif isinstance(task, OpenMLRegressionTask):
             proba_y = None
@@ -1644,16 +1671,16 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
         if self._is_hpo_class(model_copy):
             trace_data = self._extract_trace_data(model_copy, rep_no, fold_no)
-            trace = self._obtain_arff_trace(model_copy, trace_data)  # type: Optional[OpenMLRunTrace]  # noqa E501
+            trace = self._obtain_arff_trace(
+                model_copy, trace_data
+            )  # type: Optional[OpenMLRunTrace]  # noqa E501
         else:
             trace = None
 
         return pred_y, proba_y, user_defined_measures, trace
 
     def obtain_parameter_values(
-        self,
-        flow: 'OpenMLFlow',
-        model: Any = None,
+        self, flow: "OpenMLFlow", model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -1685,8 +1712,7 @@ def get_flow_dict(_flow):
                 flow_map.update(get_flow_dict(_flow.components[subflow]))
             return flow_map
 
-        def extract_parameters(_flow, _flow_dict, component_model,
-                               _main_call=False, main_id=None):
+        def extract_parameters(_flow, _flow_dict, component_model, _main_call=False, main_id=None):
             def is_subcomponent_specification(values):
                 # checks whether the current value can be a specification of
                 # subcomponents, as for example the value for steps parameter
@@ -1710,21 +1736,21 @@ def is_subcomponent_specification(values):
             # not have to rely on _flow_dict
             exp_parameters = set(_flow.parameters)
             exp_components = set(_flow.components)
-            model_parameters = set([mp for mp in component_model.get_params()
-                                    if '__' not in mp])
+            model_parameters = set([mp for mp in component_model.get_params() if "__" not in mp])
             if len((exp_parameters | exp_components) ^ model_parameters) != 0:
                 flow_params = sorted(exp_parameters | exp_components)
                 model_params = sorted(model_parameters)
-                raise ValueError('Parameters of the model do not match the '
-                                 'parameters expected by the '
-                                 'flow:\nexpected flow parameters: '
-                                 '%s\nmodel parameters: %s' % (flow_params,
-                                                               model_params))
+                raise ValueError(
+                    "Parameters of the model do not match the "
+                    "parameters expected by the "
+                    "flow:\nexpected flow parameters: "
+                    "%s\nmodel parameters: %s" % (flow_params, model_params)
+                )
 
             _params = []
             for _param_name in _flow.parameters:
                 _current = OrderedDict()
-                _current['oml:name'] = _param_name
+                _current["oml:name"] = _param_name
 
                 current_param_values = self.model_to_flow(component_model.get_params()[_param_name])
 
@@ -1743,47 +1769,46 @@ def is_subcomponent_specification(values):
                         # (mixed)). OpenML replaces the subcomponent by an
                         # OpenMLFlow object.
                         if len(subcomponent) < 2 or len(subcomponent) > 3:
-                            raise ValueError('Component reference should be '
-                                             'size {2,3}. ')
+                            raise ValueError("Component reference should be " "size {2,3}. ")
 
                         subcomponent_identifier = subcomponent[0]
                         subcomponent_flow = subcomponent[1]
                         if not isinstance(subcomponent_identifier, str):
-                            raise TypeError('Subcomponent identifier should be '
-                                            'string')
-                        if not isinstance(subcomponent_flow,
-                                          openml.flows.OpenMLFlow):
-                            raise TypeError('Subcomponent flow should be string')
+                            raise TypeError("Subcomponent identifier should be " "string")
+                        if not isinstance(subcomponent_flow, openml.flows.OpenMLFlow):
+                            raise TypeError("Subcomponent flow should be string")
 
                         current = {
                             "oml-python:serialized_object": "component_reference",
                             "value": {
                                 "key": subcomponent_identifier,
-                                "step_name": subcomponent_identifier
-                            }
+                                "step_name": subcomponent_identifier,
+                            },
                         }
                         if len(subcomponent) == 3:
                             if not isinstance(subcomponent[2], list):
-                                raise TypeError('Subcomponent argument should be'
-                                                ' list')
-                            current['value']['argument_1'] = subcomponent[2]
+                                raise TypeError("Subcomponent argument should be" " list")
+                            current["value"]["argument_1"] = subcomponent[2]
                         parsed_values.append(current)
                     parsed_values = json.dumps(parsed_values)
                 else:
                     # vanilla parameter value
                     parsed_values = json.dumps(current_param_values)
 
-                _current['oml:value'] = parsed_values
+                _current["oml:value"] = parsed_values
                 if _main_call:
-                    _current['oml:component'] = main_id
+                    _current["oml:component"] = main_id
                 else:
-                    _current['oml:component'] = _flow_dict[_flow.name]
+                    _current["oml:component"] = _flow_dict[_flow.name]
                 _params.append(_current)
 
             for _identifier in _flow.components:
                 subcomponent_model = component_model.get_params()[_identifier]
-                _params.extend(extract_parameters(_flow.components[_identifier],
-                                                  _flow_dict, subcomponent_model))
+                _params.extend(
+                    extract_parameters(
+                        _flow.components[_identifier], _flow_dict, subcomponent_model
+                    )
+                )
             return _params
 
         flow_dict = get_flow_dict(flow)
@@ -1793,9 +1818,7 @@ def is_subcomponent_specification(values):
         return parameters
 
     def _openml_param_name_to_sklearn(
-        self,
-        openml_parameter: openml.setups.OpenMLParameter,
-        flow: OpenMLFlow,
+        self, openml_parameter: openml.setups.OpenMLParameter, flow: OpenMLFlow,
     ) -> str:
         """
         Converts the name of an OpenMLParameter into the sklean name, given a flow.
@@ -1814,15 +1837,15 @@ def _openml_param_name_to_sklearn(
             The name the parameter will have once used in scikit-learn
         """
         if not isinstance(openml_parameter, openml.setups.OpenMLParameter):
-            raise ValueError('openml_parameter should be an instance of OpenMLParameter')
+            raise ValueError("openml_parameter should be an instance of OpenMLParameter")
         if not isinstance(flow, OpenMLFlow):
-            raise ValueError('flow should be an instance of OpenMLFlow')
+            raise ValueError("flow should be an instance of OpenMLFlow")
 
-        flow_structure = flow.get_structure('name')
+        flow_structure = flow.get_structure("name")
         if openml_parameter.flow_name not in flow_structure:
-            raise ValueError('Obtained OpenMLParameter and OpenMLFlow do not correspond. ')
+            raise ValueError("Obtained OpenMLParameter and OpenMLFlow do not correspond. ")
         name = openml_parameter.flow_name  # for PEP8
-        return '__'.join(flow_structure[name] + [openml_parameter.parameter_name])
+        return "__".join(flow_structure[name] + [openml_parameter.parameter_name])
 
     ################################################################################################
     # Methods for hyperparameter optimization
@@ -1844,9 +1867,7 @@ def _is_hpo_class(self, model: Any) -> bool:
         return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
 
     def instantiate_model_from_hpo_class(
-        self,
-        model: Any,
-        trace_iteration: OpenMLTraceIteration,
+        self, model: Any, trace_iteration: OpenMLTraceIteration,
     ) -> Any:
         """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
         optimization model.
@@ -1864,7 +1885,7 @@ def instantiate_model_from_hpo_class(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                'Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV'
+                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
                 % model
             )
         base_estimator = model.estimator
@@ -1873,16 +1894,16 @@ def instantiate_model_from_hpo_class(
 
     def _extract_trace_data(self, model, rep_no, fold_no):
         arff_tracecontent = []
-        for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
+        for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
             # we use the string values for True and False, as it is defined in
             # this way by the OpenML server
-            selected = 'false'
+            selected = "false"
             if itt_no == model.best_index_:
-                selected = 'true'
-            test_score = model.cv_results_['mean_test_score'][itt_no]
+                selected = "true"
+            test_score = model.cv_results_["mean_test_score"][itt_no]
             arff_line = [rep_no, fold_no, itt_no, test_score, selected]
             for key in model.cv_results_:
-                if key.startswith('param_'):
+                if key.startswith("param_"):
                     value = model.cv_results_[key][itt_no]
                     if value is not np.ma.masked:
                         serialized_value = json.dumps(value)
@@ -1892,11 +1913,7 @@ def _extract_trace_data(self, model, rep_no, fold_no):
             arff_tracecontent.append(arff_line)
         return arff_tracecontent
 
-    def _obtain_arff_trace(
-        self,
-        model: Any,
-        trace_content: List,
-    ) -> 'OpenMLRunTrace':
+    def _obtain_arff_trace(self, model: Any, trace_content: List,) -> "OpenMLRunTrace":
         """Create arff trace object from a fitted model and the trace content obtained by
         repeatedly calling ``run_model_on_task``.
 
@@ -1914,37 +1931,43 @@ def _obtain_arff_trace(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                'Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV'
+                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
                 % model
             )
-        if not hasattr(model, 'cv_results_'):
-            raise ValueError('model should contain `cv_results_`')
+        if not hasattr(model, "cv_results_"):
+            raise ValueError("model should contain `cv_results_`")
 
         # attributes that will be in trace arff, regardless of the model
-        trace_attributes = [('repeat', 'NUMERIC'),
-                            ('fold', 'NUMERIC'),
-                            ('iteration', 'NUMERIC'),
-                            ('evaluation', 'NUMERIC'),
-                            ('selected', ['true', 'false'])]
+        trace_attributes = [
+            ("repeat", "NUMERIC"),
+            ("fold", "NUMERIC"),
+            ("iteration", "NUMERIC"),
+            ("evaluation", "NUMERIC"),
+            ("selected", ["true", "false"]),
+        ]
 
         # model dependent attributes for trace arff
         for key in model.cv_results_:
-            if key.startswith('param_'):
+            if key.startswith("param_"):
                 # supported types should include all types, including bool,
                 # int float
                 supported_basic_types = (bool, int, float, str)
                 for param_value in model.cv_results_[key]:
-                    if isinstance(param_value, supported_basic_types) or \
-                            param_value is None or param_value is np.ma.masked:
+                    if (
+                        isinstance(param_value, supported_basic_types)
+                        or param_value is None
+                        or param_value is np.ma.masked
+                    ):
                         # basic string values
-                        type = 'STRING'
-                    elif isinstance(param_value, (list, tuple)) and \
-                            all(isinstance(i, int) for i in param_value):
+                        type = "STRING"
+                    elif isinstance(param_value, (list, tuple)) and all(
+                        isinstance(i, int) for i in param_value
+                    ):
                         # list of integers (usually for selecting features)
                         # hyperparameter layer_sizes of MLPClassifier
-                        type = 'STRING'
+                        type = "STRING"
                     else:
-                        raise TypeError('Unsupported param type in param grid: %s' % key)
+                        raise TypeError("Unsupported param type in param grid: %s" % key)
 
                 # renamed the attribute param to parameter, as this is a required
                 # OpenML convention - this also guards against name collisions
@@ -1952,7 +1975,4 @@ def _obtain_arff_trace(
                 attribute = (PREFIX + key[6:], type)
                 trace_attributes.append(attribute)
 
-        return OpenMLRunTrace.generate(
-            trace_attributes,
-            trace_content,
-        )
+        return OpenMLRunTrace.generate(trace_attributes, trace_content,)
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
index f2c16a8a0..3642b9c56 100644
--- a/openml/flows/__init__.py
+++ b/openml/flows/__init__.py
@@ -5,10 +5,10 @@
 from .functions import get_flow, list_flows, flow_exists, get_flow_id, assert_flows_equal
 
 __all__ = [
-    'OpenMLFlow',
-    'get_flow',
-    'list_flows',
-    'get_flow_id',
-    'flow_exists',
-    'assert_flows_equal',
+    "OpenMLFlow",
+    "get_flow",
+    "list_flows",
+    "get_flow_id",
+    "flow_exists",
+    "assert_flows_equal",
 ]
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index bd8d97d7c..47939c867 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -84,23 +84,43 @@ class OpenMLFlow(OpenMLBase):
         OpenML version of the flow. Assigned by the server.
     """
 
-    def __init__(self, name, description, model, components, parameters,
-                 parameters_meta_info, external_version, tags, language,
-                 dependencies, class_name=None, custom_name=None,
-                 binary_url=None, binary_format=None,
-                 binary_md5=None, uploader=None, upload_date=None,
-                 flow_id=None, extension=None, version=None):
+    def __init__(
+        self,
+        name,
+        description,
+        model,
+        components,
+        parameters,
+        parameters_meta_info,
+        external_version,
+        tags,
+        language,
+        dependencies,
+        class_name=None,
+        custom_name=None,
+        binary_url=None,
+        binary_format=None,
+        binary_md5=None,
+        uploader=None,
+        upload_date=None,
+        flow_id=None,
+        extension=None,
+        version=None,
+    ):
         self.name = name
         self.description = description
         self.model = model
 
         for variable, variable_name in [
-                [components, 'components'],
-                [parameters, 'parameters'],
-                [parameters_meta_info, 'parameters_meta_info']]:
+            [components, "components"],
+            [parameters, "parameters"],
+            [parameters_meta_info, "parameters_meta_info"],
+        ]:
             if not isinstance(variable, OrderedDict):
-                raise TypeError('%s must be of type OrderedDict, '
-                                'but is %s.' % (variable_name, type(variable)))
+                raise TypeError(
+                    "%s must be of type OrderedDict, "
+                    "but is %s." % (variable_name, type(variable))
+                )
 
         self.components = components
         self.parameters = parameters
@@ -110,15 +130,16 @@ def __init__(self, name, description, model, components, parameters,
         keys_parameters = set(parameters.keys())
         keys_parameters_meta_info = set(parameters_meta_info.keys())
         if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
-            raise ValueError('Parameter %s only in parameters, but not in '
-                             'parameters_meta_info.' %
-                             str(keys_parameters.difference(
-                                 keys_parameters_meta_info)))
+            raise ValueError(
+                "Parameter %s only in parameters, but not in "
+                "parameters_meta_info." % str(keys_parameters.difference(keys_parameters_meta_info))
+            )
         if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
-            raise ValueError('Parameter %s only in parameters_meta_info, '
-                             'but not in parameters.' %
-                             str(keys_parameters_meta_info.difference(
-                                 keys_parameters)))
+            raise ValueError(
+                "Parameter %s only in parameters_meta_info, "
+                "but not in parameters."
+                % str(keys_parameters_meta_info.difference(keys_parameters))
+            )
 
         self.external_version = external_version
         self.uploader = uploader
@@ -147,45 +168,64 @@ def extension(self):
         if self._extension is not None:
             return self._extension
         else:
-            raise RuntimeError("No extension could be found for flow {}: {}"
-                               .format(self.flow_id, self.name))
+            raise RuntimeError(
+                "No extension could be found for flow {}: {}".format(self.flow_id, self.name)
+            )
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
-        fields = {"Flow Name": self.name,
-                  "Flow Description": self.description,
-                  "Dependencies": self.dependencies}
+        fields = {
+            "Flow Name": self.name,
+            "Flow Description": self.description,
+            "Dependencies": self.dependencies,
+        }
         if self.flow_id is not None:
             fields["Flow URL"] = self.openml_url
             fields["Flow ID"] = str(self.flow_id)
             if self.version is not None:
                 fields["Flow ID"] += " (version {})".format(self.version)
         if self.upload_date is not None:
-            fields["Upload Date"] = self.upload_date.replace('T', ' ')
+            fields["Upload Date"] = self.upload_date.replace("T", " ")
         if self.binary_url is not None:
             fields["Binary URL"] = self.binary_url
 
         # determines the order in which the information will be printed
-        order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
-                 "Upload Date", "Dependencies"]
+        order = [
+            "Flow ID",
+            "Flow URL",
+            "Flow Name",
+            "Flow Description",
+            "Binary URL",
+            "Upload Date",
+            "Dependencies",
+        ]
         return [(key, fields[key]) for key in order if key in fields]
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self. """
         flow_container = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
-        flow_dict = OrderedDict([('@xmlns:oml', 'http://openml.org/openml')])  # type: 'OrderedDict[str, Union[List, str]]'  # noqa E501
-        flow_container['oml:flow'] = flow_dict
-        _add_if_nonempty(flow_dict, 'oml:id', self.flow_id)
+        flow_dict = OrderedDict(
+            [("@xmlns:oml", "http://openml.org/openml")]
+        )  # type: 'OrderedDict[str, Union[List, str]]'  # noqa E501
+        flow_container["oml:flow"] = flow_dict
+        _add_if_nonempty(flow_dict, "oml:id", self.flow_id)
 
         for required in ["name", "external_version"]:
             if getattr(self, required) is None:
-                raise ValueError("self.{} is required but None".format(
-                    required))
-        for attribute in ["uploader", "name", "custom_name", "class_name",
-                          "version", "external_version", "description",
-                          "upload_date", "language", "dependencies"]:
-            _add_if_nonempty(flow_dict, 'oml:{}'.format(attribute),
-                             getattr(self, attribute))
+                raise ValueError("self.{} is required but None".format(required))
+        for attribute in [
+            "uploader",
+            "name",
+            "custom_name",
+            "class_name",
+            "version",
+            "external_version",
+            "description",
+            "upload_date",
+            "language",
+            "dependencies",
+        ]:
+            _add_if_nonempty(flow_dict, "oml:{}".format(attribute), getattr(self, attribute))
 
         if not self.description:
             logger = logging.getLogger(__name__)
@@ -194,51 +234,53 @@ def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
         flow_parameters = []
         for key in self.parameters:
             param_dict = OrderedDict()  # type: 'OrderedDict[str, str]'
-            param_dict['oml:name'] = key
+            param_dict["oml:name"] = key
             meta_info = self.parameters_meta_info[key]
 
-            _add_if_nonempty(param_dict, 'oml:data_type',
-                             meta_info['data_type'])
-            param_dict['oml:default_value'] = self.parameters[key]
-            _add_if_nonempty(param_dict, 'oml:description',
-                             meta_info['description'])
+            _add_if_nonempty(param_dict, "oml:data_type", meta_info["data_type"])
+            param_dict["oml:default_value"] = self.parameters[key]
+            _add_if_nonempty(param_dict, "oml:description", meta_info["description"])
 
             for key_, value in param_dict.items():
                 if key_ is not None and not isinstance(key_, str):
-                    raise ValueError('Parameter name %s cannot be serialized '
-                                     'because it is of type %s. Only strings '
-                                     'can be serialized.' % (key_, type(key_)))
+                    raise ValueError(
+                        "Parameter name %s cannot be serialized "
+                        "because it is of type %s. Only strings "
+                        "can be serialized." % (key_, type(key_))
+                    )
                 if value is not None and not isinstance(value, str):
-                    raise ValueError('Parameter value %s cannot be serialized '
-                                     'because it is of type %s. Only strings '
-                                     'can be serialized.'
-                                     % (value, type(value)))
+                    raise ValueError(
+                        "Parameter value %s cannot be serialized "
+                        "because it is of type %s. Only strings "
+                        "can be serialized." % (value, type(value))
+                    )
 
             flow_parameters.append(param_dict)
 
-        flow_dict['oml:parameter'] = flow_parameters
+        flow_dict["oml:parameter"] = flow_parameters
 
         components = []
         for key in self.components:
             component_dict = OrderedDict()  # type: 'OrderedDict[str, Dict]'
-            component_dict['oml:identifier'] = key
-            component_dict['oml:flow'] = self.components[key]._to_dict()['oml:flow']
+            component_dict["oml:identifier"] = key
+            component_dict["oml:flow"] = self.components[key]._to_dict()["oml:flow"]
 
             for key_ in component_dict:
                 # We only need to check if the key is a string, because the
                 # value is a flow. The flow itself is valid by recursion
                 if key_ is not None and not isinstance(key_, str):
-                    raise ValueError('Parameter name %s cannot be serialized '
-                                     'because it is of type %s. Only strings '
-                                     'can be serialized.' % (key_, type(key_)))
+                    raise ValueError(
+                        "Parameter name %s cannot be serialized "
+                        "because it is of type %s. Only strings "
+                        "can be serialized." % (key_, type(key_))
+                    )
 
             components.append(component_dict)
 
-        flow_dict['oml:component'] = components
-        flow_dict['oml:tag'] = self.tags
+        flow_dict["oml:component"] = components
+        flow_dict["oml:tag"] = self.tags
         for attribute in ["binary_url", "binary_format", "binary_md5"]:
-            _add_if_nonempty(flow_dict, 'oml:{}'.format(attribute),
-                             getattr(self, attribute))
+            _add_if_nonempty(flow_dict, "oml:{}".format(attribute), getattr(self, attribute))
 
         return flow_container
 
@@ -266,30 +308,29 @@ def _from_dict(cls, xml_dict):
         dic = xml_dict["oml:flow"]
 
         # Mandatory parts in the xml file
-        for key in ['name']:
+        for key in ["name"]:
             arguments[key] = dic["oml:" + key]
 
         # non-mandatory parts in the xml file
         for key in [
-            'external_version',
-            'uploader',
-            'description',
-            'upload_date',
-            'language',
-            'dependencies',
-            'version',
-            'binary_url',
-            'binary_format',
-            'binary_md5',
-            'class_name',
-            'custom_name',
+            "external_version",
+            "uploader",
+            "description",
+            "upload_date",
+            "language",
+            "dependencies",
+            "version",
+            "binary_url",
+            "binary_format",
+            "binary_md5",
+            "class_name",
+            "custom_name",
         ]:
             arguments[key] = dic.get("oml:" + key)
 
         # has to be converted to an int if present and cannot parsed in the
         # two loops above
-        arguments['flow_id'] = (int(dic['oml:id']) if dic.get("oml:id")
-                                is not None else None)
+        arguments["flow_id"] = int(dic["oml:id"]) if dic.get("oml:id") is not None else None
 
         # Now parse parts of a flow which can occur multiple times like
         # parameters, components (subflows) and tags. These can't be tackled
@@ -302,62 +343,60 @@ def _from_dict(cls, xml_dict):
 
         parameters = OrderedDict()
         parameters_meta_info = OrderedDict()
-        if 'oml:parameter' in dic:
+        if "oml:parameter" in dic:
             # In case of a single parameter, xmltodict returns a dictionary,
             # otherwise a list.
-            oml_parameters = extract_xml_tags('oml:parameter', dic,
-                                              allow_none=False)
+            oml_parameters = extract_xml_tags("oml:parameter", dic, allow_none=False)
 
             for oml_parameter in oml_parameters:
-                parameter_name = oml_parameter['oml:name']
-                default_value = oml_parameter['oml:default_value']
+                parameter_name = oml_parameter["oml:name"]
+                default_value = oml_parameter["oml:default_value"]
                 parameters[parameter_name] = default_value
 
                 meta_info = OrderedDict()
-                meta_info['description'] = oml_parameter.get('oml:description')
-                meta_info['data_type'] = oml_parameter.get('oml:data_type')
+                meta_info["description"] = oml_parameter.get("oml:description")
+                meta_info["data_type"] = oml_parameter.get("oml:data_type")
                 parameters_meta_info[parameter_name] = meta_info
-        arguments['parameters'] = parameters
-        arguments['parameters_meta_info'] = parameters_meta_info
+        arguments["parameters"] = parameters
+        arguments["parameters_meta_info"] = parameters_meta_info
 
         components = OrderedDict()
-        if 'oml:component' in dic:
+        if "oml:component" in dic:
             # In case of a single component xmltodict returns a dict,
             # otherwise a list.
-            oml_components = extract_xml_tags('oml:component', dic,
-                                              allow_none=False)
+            oml_components = extract_xml_tags("oml:component", dic, allow_none=False)
 
             for component in oml_components:
                 flow = OpenMLFlow._from_dict(component)
-                components[component['oml:identifier']] = flow
-        arguments['components'] = components
-        arguments['tags'] = extract_xml_tags('oml:tag', dic)
+                components[component["oml:identifier"]] = flow
+        arguments["components"] = components
+        arguments["tags"] = extract_xml_tags("oml:tag", dic)
 
-        arguments['model'] = None
+        arguments["model"] = None
         flow = cls(**arguments)
 
         return flow
 
     def to_filesystem(self, output_directory: str) -> None:
         os.makedirs(output_directory, exist_ok=True)
-        if 'flow.xml' in os.listdir(output_directory):
-            raise ValueError('Output directory already contains a flow.xml file.')
+        if "flow.xml" in os.listdir(output_directory):
+            raise ValueError("Output directory already contains a flow.xml file.")
 
         run_xml = self._to_xml()
-        with open(os.path.join(output_directory, 'flow.xml'), 'w') as f:
+        with open(os.path.join(output_directory, "flow.xml"), "w") as f:
             f.write(run_xml)
 
     @classmethod
-    def from_filesystem(cls, input_directory) -> 'OpenMLFlow':
-        with open(os.path.join(input_directory, 'flow.xml'), 'r') as f:
+    def from_filesystem(cls, input_directory) -> "OpenMLFlow":
+        with open(os.path.join(input_directory, "flow.xml"), "r") as f:
             xml_string = f.read()
         return OpenMLFlow._from_dict(xmltodict.parse(xml_string))
 
     def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
-        self.flow_id = int(xml_response['oml:upload_flow']['oml:id'])
+        self.flow_id = int(xml_response["oml:upload_flow"]["oml:id"])
 
-    def publish(self, raise_error_if_exists: bool = False) -> 'OpenMLFlow':
+    def publish(self, raise_error_if_exists: bool = False) -> "OpenMLFlow":
         """ Publish this flow to OpenML server.
 
         Raises a PyOpenMLError if the flow exists on the server, but
@@ -383,30 +422,37 @@ def publish(self, raise_error_if_exists: bool = False) -> 'OpenMLFlow':
         flow_id = openml.flows.functions.flow_exists(self.name, self.external_version)
         if not flow_id:
             if self.flow_id:
-                raise openml.exceptions.PyOpenMLError("Flow does not exist on the server, "
-                                                      "but 'flow.flow_id' is not None.")
+                raise openml.exceptions.PyOpenMLError(
+                    "Flow does not exist on the server, " "but 'flow.flow_id' is not None."
+                )
             super().publish()
             flow_id = self.flow_id
         elif raise_error_if_exists:
             error_message = "This OpenMLFlow already exists with id: {}.".format(flow_id)
             raise openml.exceptions.PyOpenMLError(error_message)
         elif self.flow_id is not None and self.flow_id != flow_id:
-            raise openml.exceptions.PyOpenMLError("Local flow_id does not match server flow_id: "
-                                                  "'{}' vs '{}'".format(self.flow_id, flow_id))
+            raise openml.exceptions.PyOpenMLError(
+                "Local flow_id does not match server flow_id: "
+                "'{}' vs '{}'".format(self.flow_id, flow_id)
+            )
 
         flow = openml.flows.functions.get_flow(flow_id)
         _copy_server_fields(flow, self)
         try:
             openml.flows.functions.assert_flows_equal(
-                self, flow, flow.upload_date,
+                self,
+                flow,
+                flow.upload_date,
                 ignore_parameter_values=True,
-                ignore_custom_name_if_none=True
+                ignore_custom_name_if_none=True,
             )
         except ValueError as e:
             message = e.args[0]
-            raise ValueError("The flow on the server is inconsistent with the local flow. "
-                             "The server flow ID is {}. Please check manually and remove "
-                             "the flow if necessary! Error is:\n'{}'".format(flow_id, message))
+            raise ValueError(
+                "The flow on the server is inconsistent with the local flow. "
+                "The server flow ID is {}. Please check manually and remove "
+                "the flow if necessary! Error is:\n'{}'".format(flow_id, message)
+            )
         return self
 
     def get_structure(self, key_item: str) -> Dict[str, List[str]]:
@@ -427,8 +473,8 @@ def get_structure(self, key_item: str) -> Dict[str, List[str]]:
         dict[str, List[str]]
             The flow structure
         """
-        if key_item not in ['flow_id', 'name']:
-            raise ValueError('key_item should be in {flow_id, name}')
+        if key_item not in ["flow_id", "name"]:
+            raise ValueError("key_item should be in {flow_id, name}")
         structure = dict()
         for key, sub_flow in self.components.items():
             sub_structure = sub_flow.get_structure(key_item)
@@ -455,11 +501,13 @@ def get_subflow(self, structure):
         # outer scope
         structure = list(structure)
         if len(structure) < 1:
-            raise ValueError('Please provide a structure list of size >= 1')
+            raise ValueError("Please provide a structure list of size >= 1")
         sub_identifier = structure[0]
         if sub_identifier not in self.components:
-            raise ValueError('Flow %s does not contain component with '
-                             'identifier %s' % (self.name, sub_identifier))
+            raise ValueError(
+                "Flow %s does not contain component with "
+                "identifier %s" % (self.name, sub_identifier)
+            )
         if len(structure) == 1:
             return self.components[sub_identifier]
         else:
@@ -468,8 +516,7 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
-    fields_added_by_the_server = ['flow_id', 'uploader', 'version',
-                                  'upload_date']
+    fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
     for field in fields_added_by_the_server:
         setattr(target_flow, field, getattr(source_flow, field))
 
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 5bbbcbd16..5e8e9dc93 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -15,7 +15,7 @@
 import openml.utils
 
 
-FLOWS_CACHE_DIR_NAME = 'flows'
+FLOWS_CACHE_DIR_NAME = "flows"
 
 
 def _get_cached_flows() -> OrderedDict:
@@ -57,24 +57,19 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
     OpenMLFlow.
     """
 
-    fid_cache_dir = openml.utils._create_cache_directory_for_id(
-        FLOWS_CACHE_DIR_NAME,
-        fid
-    )
+    fid_cache_dir = openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, fid)
     flow_file = os.path.join(fid_cache_dir, "flow.xml")
 
     try:
-        with io.open(flow_file, encoding='utf8') as fh:
+        with io.open(flow_file, encoding="utf8") as fh:
             return _create_flow_from_xml(fh.read())
     except (OSError, IOError):
         openml.utils._remove_cache_dir_for_id(FLOWS_CACHE_DIR_NAME, fid_cache_dir)
-        raise OpenMLCacheException("Flow file for fid %d not "
-                                   "cached" % fid)
+        raise OpenMLCacheException("Flow file for fid %d not " "cached" % fid)
 
 
 @openml.utils.thread_safe_if_oslo_installed
-def get_flow(flow_id: int, reinstantiate: bool = False,
-             strict_version: bool = True) -> OpenMLFlow:
+def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow:
     """Download the OpenML flow for a given flow ID.
 
     Parameters
@@ -97,8 +92,7 @@ def get_flow(flow_id: int, reinstantiate: bool = False,
     flow = _get_flow_description(flow_id)
 
     if reinstantiate:
-        flow.model = flow.extension.flow_to_model(
-            flow, strict_version=strict_version)
+        flow.model = flow.extension.flow_to_model(flow, strict_version=strict_version)
         if not strict_version:
             # check if we need to return a new flow b/c of version mismatch
             new_flow = flow.extension.model_to_flow(flow.model)
@@ -128,12 +122,11 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
     except OpenMLCacheException:
 
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
-            "flow.xml",
+            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id), "flow.xml",
         )
 
-        flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method='get')
-        with io.open(xml_file, "w", encoding='utf8') as fh:
+        flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method="get")
+        with io.open(xml_file, "w", encoding="utf8") as fh:
             fh.write(flow_xml)
 
         return _create_flow_from_xml(flow_xml)
@@ -143,7 +136,7 @@ def list_flows(
     offset: Optional[int] = None,
     size: Optional[int] = None,
     tag: Optional[str] = None,
-    output_format: str = 'dict',
+    output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
 
@@ -191,19 +184,22 @@ def list_flows(
             - external version
             - uploader
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
 
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_flows,
-                                  offset=offset,
-                                  size=size,
-                                  tag=tag,
-                                  **kwargs)
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_flows,
+        offset=offset,
+        size=size,
+        tag=tag,
+        **kwargs
+    )
 
 
-def _list_flows(output_format='dict', **kwargs) -> Union[Dict, pd.DataFrame]:
+def _list_flows(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
     """
     Perform the api call that return a list of all flows.
 
@@ -252,18 +248,16 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
     see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
     """
     if not (isinstance(name, str) and len(name) > 0):
-        raise ValueError('Argument \'name\' should be a non-empty string')
+        raise ValueError("Argument 'name' should be a non-empty string")
     if not (isinstance(name, str) and len(external_version) > 0):
-        raise ValueError('Argument \'version\' should be a non-empty string')
+        raise ValueError("Argument 'version' should be a non-empty string")
 
     xml_response = openml._api_calls._perform_api_call(
-        "flow/exists",
-        'post',
-        data={'name': name, 'external_version': external_version},
+        "flow/exists", "post", data={"name": name, "external_version": external_version},
     )
 
     result_dict = xmltodict.parse(xml_response)
-    flow_id = int(result_dict['oml:flow_exists']['oml:id'])
+    flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
     if flow_id > 0:
         return flow_id
     else:
@@ -271,9 +265,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
 
 
 def get_flow_id(
-    model: Optional[Any] = None,
-    name: Optional[str] = None,
-    exact_version=True,
+    model: Optional[Any] = None, name: Optional[str] = None, exact_version=True,
 ) -> Union[int, bool, List[int]]:
     """Retrieves the flow id for a model or a flow name.
 
@@ -307,12 +299,10 @@ def get_flow_id(
     """
     if model is None and name is None:
         raise ValueError(
-            'Need to provide either argument `model` or argument `name`, but both are `None`.'
+            "Need to provide either argument `model` or argument `name`, but both are `None`."
         )
     elif model is not None and name is not None:
-        raise ValueError(
-            'Must provide either argument `model` or argument `name`, but not both.'
-        )
+        raise ValueError("Must provide either argument `model` or argument `name`, but not both.")
 
     if model is not None:
         extension = openml.extensions.get_extension_by_model(model, raise_if_no_extension=True)
@@ -330,39 +320,38 @@ def get_flow_id(
     if exact_version:
         return flow_exists(name=flow_name, external_version=external_version)
     else:
-        flows = list_flows(output_format='dataframe')
+        flows = list_flows(output_format="dataframe")
         assert isinstance(flows, pd.DataFrame)  # Make mypy happy
         flows = flows.query('name == "{}"'.format(flow_name))
-        return flows['id'].to_list()
+        return flows["id"].to_list()
 
 
-def __list_flows(
-    api_call: str,
-    output_format: str = 'dict'
-) -> Union[Dict, pd.DataFrame]:
+def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
 
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
 
     # Minimalistic check if the XML is useful
-    assert type(flows_dict['oml:flows']['oml:flow']) == list, \
-        type(flows_dict['oml:flows'])
-    assert flows_dict['oml:flows']['@xmlns:oml'] == \
-        'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
+    assert type(flows_dict["oml:flows"]["oml:flow"]) == list, type(flows_dict["oml:flows"])
+    assert flows_dict["oml:flows"]["@xmlns:oml"] == "http://openml.org/openml", flows_dict[
+        "oml:flows"
+    ]["@xmlns:oml"]
 
     flows = dict()
-    for flow_ in flows_dict['oml:flows']['oml:flow']:
-        fid = int(flow_['oml:id'])
-        flow = {'id': fid,
-                'full_name': flow_['oml:full_name'],
-                'name': flow_['oml:name'],
-                'version': flow_['oml:version'],
-                'external_version': flow_['oml:external_version'],
-                'uploader': flow_['oml:uploader']}
+    for flow_ in flows_dict["oml:flows"]["oml:flow"]:
+        fid = int(flow_["oml:id"])
+        flow = {
+            "id": fid,
+            "full_name": flow_["oml:full_name"],
+            "name": flow_["oml:name"],
+            "version": flow_["oml:version"],
+            "external_version": flow_["oml:external_version"],
+            "uploader": flow_["oml:uploader"],
+        }
         flows[fid] = flow
 
-    if output_format == 'dataframe':
-        flows = pd.DataFrame.from_dict(flows, orient='index')
+    if output_format == "dataframe":
+        flows = pd.DataFrame.from_dict(flows, orient="index")
 
     return flows
 
@@ -383,11 +372,14 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
                 stack.append(component)
 
 
-def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
-                       ignore_parameter_values_on_older_children: str = None,
-                       ignore_parameter_values: bool = False,
-                       ignore_custom_name_if_none: bool = False,
-                       check_description: bool = True) -> None:
+def assert_flows_equal(
+    flow1: OpenMLFlow,
+    flow2: OpenMLFlow,
+    ignore_parameter_values_on_older_children: str = None,
+    ignore_parameter_values: bool = False,
+    ignore_custom_name_if_none: bool = False,
+    check_description: bool = True,
+) -> None:
     """Check equality of two flows.
 
     Two flows are equal if their all keys which are not set by the server
@@ -413,62 +405,70 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
         Whether to ignore matching of flow descriptions.
     """
     if not isinstance(flow1, OpenMLFlow):
-        raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
-                        type(flow1))
+        raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1))
 
     if not isinstance(flow2, OpenMLFlow):
-        raise TypeError('Argument 2 must be of type OpenMLFlow, but is %s' %
-                        type(flow2))
+        raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2))
 
     # TODO as they are actually now saved during publish, it might be good to
     # check for the equality of these as well.
-    generated_by_the_server = ['flow_id', 'uploader', 'version', 'upload_date',
-                               # Tags aren't directly created by the server,
-                               # but the uploader has no control over them!
-                               'tags']
-    ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
-                             'model', '_entity_id']
+    generated_by_the_server = [
+        "flow_id",
+        "uploader",
+        "version",
+        "upload_date",
+        # Tags aren't directly created by the server,
+        # but the uploader has no control over them!
+        "tags",
+    ]
+    ignored_by_python_api = ["binary_url", "binary_format", "binary_md5", "model", "_entity_id"]
 
     for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
         if key in generated_by_the_server + ignored_by_python_api:
             continue
         attr1 = getattr(flow1, key, None)
         attr2 = getattr(flow2, key, None)
-        if key == 'components':
+        if key == "components":
             for name in set(attr1.keys()).union(attr2.keys()):
                 if name not in attr1:
-                    raise ValueError('Component %s only available in '
-                                     'argument2, but not in argument1.' % name)
+                    raise ValueError(
+                        "Component %s only available in " "argument2, but not in argument1." % name
+                    )
                 if name not in attr2:
-                    raise ValueError('Component %s only available in '
-                                     'argument2, but not in argument1.' % name)
-                assert_flows_equal(attr1[name], attr2[name],
-                                   ignore_parameter_values_on_older_children,
-                                   ignore_parameter_values,
-                                   ignore_custom_name_if_none)
-        elif key == '_extension':
+                    raise ValueError(
+                        "Component %s only available in " "argument2, but not in argument1." % name
+                    )
+                assert_flows_equal(
+                    attr1[name],
+                    attr2[name],
+                    ignore_parameter_values_on_older_children,
+                    ignore_parameter_values,
+                    ignore_custom_name_if_none,
+                )
+        elif key == "_extension":
             continue
-        elif check_description and key == 'description':
+        elif check_description and key == "description":
             # to ignore matching of descriptions since sklearn based flows may have
             # altering docstrings and is not guaranteed to be consistent
             continue
         else:
-            if key == 'parameters':
-                if ignore_parameter_values or \
-                        ignore_parameter_values_on_older_children:
+            if key == "parameters":
+                if ignore_parameter_values or ignore_parameter_values_on_older_children:
                     params_flow_1 = set(flow1.parameters.keys())
                     params_flow_2 = set(flow2.parameters.keys())
                     symmetric_difference = params_flow_1 ^ params_flow_2
                     if len(symmetric_difference) > 0:
-                        raise ValueError('Flow %s: parameter set of flow '
-                                         'differs from the parameters stored '
-                                         'on the server.' % flow1.name)
+                        raise ValueError(
+                            "Flow %s: parameter set of flow "
+                            "differs from the parameters stored "
+                            "on the server." % flow1.name
+                        )
 
                 if ignore_parameter_values_on_older_children:
-                    upload_date_current_flow = dateutil.parser.parse(
-                        flow1.upload_date)
+                    upload_date_current_flow = dateutil.parser.parse(flow1.upload_date)
                     upload_date_parent_flow = dateutil.parser.parse(
-                        ignore_parameter_values_on_older_children)
+                        ignore_parameter_values_on_older_children
+                    )
                     if upload_date_current_flow < upload_date_parent_flow:
                         continue
 
@@ -476,14 +476,16 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
                     # Continue needs to be done here as the first if
                     # statement triggers in both special cases
                     continue
-            elif (key == 'custom_name'
-                  and ignore_custom_name_if_none
-                  and (attr1 is None or attr2 is None)):
+            elif (
+                key == "custom_name"
+                and ignore_custom_name_if_none
+                and (attr1 is None or attr2 is None)
+            ):
                 # If specified, we allow `custom_name` inequality if one flow's name is None.
                 # Helps with backwards compatibility as `custom_name` is now auto-generated, but
                 # before it used to be `None`.
                 continue
-            elif key == 'parameters_meta_info':
+            elif key == "parameters_meta_info":
                 # this value is a dictionary where each key is a parameter name, containing another
                 # dictionary with keys specifying the parameter's 'description' and 'data_type'
                 # checking parameter descriptions can be ignored since that might change
@@ -491,32 +493,37 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
                 params1 = set(flow1.parameters_meta_info.keys())
                 params2 = set(flow2.parameters_meta_info.keys())
                 if params1 != params2:
-                    raise ValueError('Parameter list in meta info for parameters differ '
-                                     'in the two flows.')
+                    raise ValueError(
+                        "Parameter list in meta info for parameters differ " "in the two flows."
+                    )
                 # iterating over the parameter's meta info list
                 for param in params1:
-                    if isinstance(flow1.parameters_meta_info[param], Dict) and \
-                       isinstance(flow2.parameters_meta_info[param], Dict) and \
-                       'data_type' in flow1.parameters_meta_info[param] and \
-                       'data_type' in flow2.parameters_meta_info[param]:
-                        value1 = flow1.parameters_meta_info[param]['data_type']
-                        value2 = flow2.parameters_meta_info[param]['data_type']
+                    if (
+                        isinstance(flow1.parameters_meta_info[param], Dict)
+                        and isinstance(flow2.parameters_meta_info[param], Dict)
+                        and "data_type" in flow1.parameters_meta_info[param]
+                        and "data_type" in flow2.parameters_meta_info[param]
+                    ):
+                        value1 = flow1.parameters_meta_info[param]["data_type"]
+                        value2 = flow2.parameters_meta_info[param]["data_type"]
                     else:
                         value1 = flow1.parameters_meta_info[param]
                         value2 = flow2.parameters_meta_info[param]
                     if value1 is None or value2 is None:
                         continue
                     elif value1 != value2:
-                        raise ValueError("Flow {}: data type for parameter {} in {} differ "
-                                         "as {}\nvs\n{}".format(flow1.name, param, key,
-                                                                value1, value2))
+                        raise ValueError(
+                            "Flow {}: data type for parameter {} in {} differ "
+                            "as {}\nvs\n{}".format(flow1.name, param, key, value1, value2)
+                        )
                 # the continue is to avoid the 'attr != attr2' check at end of function
                 continue
 
             if attr1 != attr2:
-                raise ValueError("Flow %s: values for attribute '%s' differ: "
-                                 "'%s'\nvs\n'%s'." %
-                                 (str(flow1.name), str(key), str(attr1), str(attr2)))
+                raise ValueError(
+                    "Flow %s: values for attribute '%s' differ: "
+                    "'%s'\nvs\n'%s'." % (str(flow1.name), str(key), str(attr1), str(attr2))
+                )
 
 
 def _create_flow_from_xml(flow_xml: str) -> OpenMLFlow:
diff --git a/openml/runs/__init__.py b/openml/runs/__init__.py
index 80d0c0ae3..e917a57a5 100644
--- a/openml/runs/__init__.py
+++ b/openml/runs/__init__.py
@@ -15,16 +15,16 @@
 )
 
 __all__ = [
-    'OpenMLRun',
-    'OpenMLRunTrace',
-    'OpenMLTraceIteration',
-    'run_model_on_task',
-    'run_flow_on_task',
-    'get_run',
-    'list_runs',
-    'get_runs',
-    'get_run_trace',
-    'run_exists',
-    'initialize_model_from_run',
-    'initialize_model_from_trace'
+    "OpenMLRun",
+    "OpenMLRunTrace",
+    "OpenMLTraceIteration",
+    "run_model_on_task",
+    "run_flow_on_task",
+    "get_run",
+    "list_runs",
+    "get_runs",
+    "get_run_trace",
+    "run_exists",
+    "initialize_model_from_run",
+    "initialize_model_from_trace",
 ]
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index ddaf3b028..b3b15d16e 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -21,8 +21,14 @@
 from ..flows import get_flow, flow_exists, OpenMLFlow
 from ..setups import setup_exists, initialize_model
 from ..exceptions import OpenMLCacheException, OpenMLServerException, OpenMLRunsExistError
-from ..tasks import OpenMLTask, OpenMLClassificationTask, OpenMLClusteringTask, \
-    OpenMLRegressionTask, OpenMLSupervisedTask, OpenMLLearningCurveTask
+from ..tasks import (
+    OpenMLTask,
+    OpenMLClassificationTask,
+    OpenMLClusteringTask,
+    OpenMLRegressionTask,
+    OpenMLSupervisedTask,
+    OpenMLLearningCurveTask,
+)
 from .run import OpenMLRun
 from .trace import OpenMLRunTrace
 from ..tasks import TaskTypeEnum, get_task
@@ -33,7 +39,7 @@
 
 # get_dict is in run.py to avoid circular imports
 
-RUNS_CACHE_DIR_NAME = 'runs'
+RUNS_CACHE_DIR_NAME = "runs"
 
 
 def run_model_on_task(
@@ -86,9 +92,12 @@ def run_model_on_task(
     # When removing this please also remove the method `is_estimator` from the extension
     # interface as it is only used here (MF, 3-2019)
     if isinstance(model, (int, str, OpenMLTask)):
-        warnings.warn("The old argument order (task, model) is deprecated and "
-                      "will not be supported in the future. Please use the "
-                      "order (model, task).", DeprecationWarning)
+        warnings.warn(
+            "The old argument order (task, model) is deprecated and "
+            "will not be supported in the future. Please use the "
+            "order (model, task).",
+            DeprecationWarning,
+        )
         task, model = model, task
 
     extension = get_extension_by_model(model, raise_if_no_extension=True)
@@ -174,9 +183,12 @@ def run_flow_on_task(
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
         # We want to allow either order of argument (to avoid confusion).
-        warnings.warn("The old argument order (Flow, model) is deprecated and "
-                      "will not be supported in the future. Please use the "
-                      "order (model, Flow).", DeprecationWarning)
+        warnings.warn(
+            "The old argument order (Flow, model) is deprecated and "
+            "will not be supported in the future. Please use the "
+            "order (model, Flow).",
+            DeprecationWarning,
+        )
         task, flow = flow, task
 
     if task.task_id is None:
@@ -193,11 +205,14 @@ def run_flow_on_task(
         flow_id = flow_exists(flow.name, flow.external_version)
         if isinstance(flow.flow_id, int) and flow_id != flow.flow_id:
             if flow_id:
-                raise PyOpenMLError("Local flow_id does not match server flow_id: "
-                                    "'{}' vs '{}'".format(flow.flow_id, flow_id))
+                raise PyOpenMLError(
+                    "Local flow_id does not match server flow_id: "
+                    "'{}' vs '{}'".format(flow.flow_id, flow_id)
+                )
             else:
-                raise PyOpenMLError("Flow does not exist on the server, "
-                                    "but 'flow.flow_id' is not None.")
+                raise PyOpenMLError(
+                    "Flow does not exist on the server, " "but 'flow.flow_id' is not None."
+                )
 
         if upload_flow and not flow_id:
             flow.publish()
@@ -210,8 +225,9 @@ def run_flow_on_task(
                 setup_id = setup_exists(flow_from_server)
                 ids = run_exists(task.task_id, setup_id)
                 if ids:
-                    error_message = ("One or more runs of this setup were "
-                                     "already performed on the task.")
+                    error_message = (
+                        "One or more runs of this setup were " "already performed on the task."
+                    )
                     raise OpenMLRunsExistError(ids, error_message)
         else:
             # Flow does not exist on server and we do not want to upload it.
@@ -222,7 +238,7 @@ def run_flow_on_task(
     dataset = task.get_dataset()
 
     run_environment = flow.extension.get_version_information()
-    tags = ['openml-python', run_environment[1]]
+    tags = ["openml-python", run_environment[1]]
 
     # execute the run
     res = _run_task_get_arffcontent(
@@ -261,9 +277,9 @@ def run_flow_on_task(
         run.fold_evaluations = fold_evaluations
 
     if flow_id:
-        message = 'Executed Task {} with Flow id:{}'.format(task.task_id, run.flow_id)
+        message = "Executed Task {} with Flow id:{}".format(task.task_id, run.flow_id)
     else:
-        message = 'Executed Task {} on local Flow with name {}.'.format(task.task_id, flow.name)
+        message = "Executed Task {} on local Flow with name {}.".format(task.task_id, flow.name)
     config.logger.info(message)
 
     return run
@@ -281,8 +297,7 @@ def get_run_trace(run_id: int) -> OpenMLRunTrace:
     -------
     openml.runs.OpenMLTrace
     """
-    trace_xml = openml._api_calls._perform_api_call('run/trace/%d' % run_id,
-                                                    'get')
+    trace_xml = openml._api_calls._perform_api_call("run/trace/%d" % run_id, "get")
     run_trace = OpenMLRunTrace.trace_from_xml(trace_xml)
     return run_trace
 
@@ -306,10 +321,7 @@ def initialize_model_from_run(run_id: int) -> Any:
 
 
 def initialize_model_from_trace(
-    run_id: int,
-    repeat: int,
-    fold: int,
-    iteration: Optional[int] = None,
+    run_id: int, repeat: int, fold: int, iteration: Optional[int] = None,
 ) -> Any:
     """
     Initialize a model based on the parameters that were set
@@ -347,7 +359,7 @@ def initialize_model_from_trace(
 
     request = (repeat, fold, iteration)
     if request not in run_trace.trace_iterations:
-        raise ValueError('Combination repeat, fold, iteration not available')
+        raise ValueError("Combination repeat, fold, iteration not available")
     current = run_trace.trace_iterations[(repeat, fold, iteration)]
 
     search_model = initialize_model_from_run(run_id)
@@ -382,7 +394,7 @@ def run_exists(task_id: int, setup_id: int) -> Set[int]:
             return set()
     except OpenMLServerException as exception:
         # error code 512 implies no results. The run does not exist yet
-        assert (exception.code == 512)
+        assert exception.code == 512
         return set()
 
 
@@ -390,13 +402,13 @@ def _run_task_get_arffcontent(
     flow: OpenMLFlow,
     model: Any,
     task: OpenMLTask,
-    extension: 'Extension',
+    extension: "Extension",
     add_local_measures: bool,
 ) -> Tuple[
     List[List],
     Optional[OpenMLRunTrace],
-    'OrderedDict[str, OrderedDict]',
-    'OrderedDict[str, OrderedDict]',
+    "OrderedDict[str, OrderedDict]",
+    "OrderedDict[str, OrderedDict]",
 ]:
     arff_datacontent = []  # type: List[List]
     traces = []  # type: List[OpenMLRunTrace]
@@ -414,22 +426,21 @@ def _run_task_get_arffcontent(
     # methods, less maintenance, less confusion)
     num_reps, num_folds, num_samples = task.get_split_dimensions()
 
-    for n_fit, (rep_no, fold_no, sample_no) in enumerate(itertools.product(
-        range(num_reps),
-        range(num_folds),
-        range(num_samples),
-    ), start=1):
+    for n_fit, (rep_no, fold_no, sample_no) in enumerate(
+        itertools.product(range(num_reps), range(num_folds), range(num_samples),), start=1
+    ):
 
         train_indices, test_indices = task.get_train_test_split_indices(
-            repeat=rep_no, fold=fold_no, sample=sample_no)
+            repeat=rep_no, fold=fold_no, sample=sample_no
+        )
         if isinstance(task, OpenMLSupervisedTask):
-            x, y = task.get_X_and_y(dataset_format='array')
+            x, y = task.get_X_and_y(dataset_format="array")
             train_x = x[train_indices]
             train_y = y[train_indices]
             test_x = x[test_indices]
             test_y = y[test_indices]
         elif isinstance(task, OpenMLClusteringTask):
-            x = task.get_X(dataset_format='array')
+            x = task.get_X(dataset_format="array")
             train_x = x[train_indices]
             train_y = None
             test_x = None
@@ -439,15 +450,14 @@ def _run_task_get_arffcontent(
 
         config.logger.info(
             "Going to execute flow '%s' on task %d for repeat %d fold %d sample %d.",
-            flow.name, task.task_id, rep_no, fold_no, sample_no,
+            flow.name,
+            task.task_id,
+            rep_no,
+            fold_no,
+            sample_no,
         )
 
-        (
-            pred_y,
-            proba_y,
-            user_defined_measures_fold,
-            trace,
-        ) = extension._run_model_on_fold(
+        pred_y, proba_y, user_defined_measures_fold, trace = extension._run_model_on_fold(
             model=model,
             task=task,
             X_train=train_x,
@@ -476,14 +486,13 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                     arff_line.append(task.class_labels[pred_y[i]])
                     arff_line.append(task.class_labels[test_y[i]])
                 else:
-                    raise ValueError('The task has no class labels')
+                    raise ValueError("The task has no class labels")
 
                 arff_datacontent.append(arff_line)
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.accuracy_score,
-                    'predictive_accuracy',
+                    sklearn.metrics.accuracy_score, "predictive_accuracy",
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
@@ -494,8 +503,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.mean_absolute_error,
-                    'mean_absolute_error',
+                    sklearn.metrics.mean_absolute_error, "mean_absolute_error",
                 )
 
         elif isinstance(task, OpenMLClusteringTask):
@@ -520,17 +528,17 @@ def _calculate_local_measure(sklearn_fn, openml_name):
             if fold_no not in user_defined_measures_per_sample[measure][rep_no]:
                 user_defined_measures_per_sample[measure][rep_no][fold_no] = OrderedDict()
 
-            user_defined_measures_per_fold[measure][rep_no][fold_no] = (
-                user_defined_measures_fold[measure]
-            )
-            user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
-                user_defined_measures_fold[measure]
-            )
+            user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
+                measure
+            ]
+            user_defined_measures_per_sample[measure][rep_no][fold_no][
+                sample_no
+            ] = user_defined_measures_fold[measure]
 
     if len(traces) > 0:
         if len(traces) != n_fit:
             raise ValueError(
-                'Did not find enough traces (expected {}, found {})'.format(n_fit, len(traces))
+                "Did not find enough traces (expected {}, found {})".format(n_fit, len(traces))
             )
         else:
             trace = OpenMLRunTrace.merge_traces(traces)
@@ -583,8 +591,7 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:
     run : OpenMLRun
         Run corresponding to ID, fetched from the server.
     """
-    run_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME,
-                                                          run_id)
+    run_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
     run_file = os.path.join(run_dir, "description.xml")
 
     if not os.path.exists(run_dir):
@@ -594,11 +601,11 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:
         if not ignore_cache:
             return _get_cached_run(run_id)
         else:
-            raise OpenMLCacheException(message='dummy')
+            raise OpenMLCacheException(message="dummy")
 
     except OpenMLCacheException:
-        run_xml = openml._api_calls._perform_api_call("run/%d" % run_id, 'get')
-        with io.open(run_file, "w", encoding='utf8') as fh:
+        run_xml = openml._api_calls._perform_api_call("run/%d" % run_id, "get")
+        with io.open(run_file, "w", encoding="utf8") as fh:
             fh.write(run_xml)
 
     run = _create_run_from_xml(run_xml)
@@ -635,94 +642,98 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
         elif not from_server:
             return None
         else:
-            raise AttributeError('Run XML does not contain required (server) '
-                                 'field: ', fieldname)
+            raise AttributeError("Run XML does not contain required (server) " "field: ", fieldname)
 
-    run = xmltodict.parse(xml, force_list=['oml:file', 'oml:evaluation',
-                                           'oml:parameter_setting'])["oml:run"]
-    run_id = obtain_field(run, 'oml:run_id', from_server, cast=int)
-    uploader = obtain_field(run, 'oml:uploader', from_server, cast=int)
-    uploader_name = obtain_field(run, 'oml:uploader_name', from_server)
-    task_id = int(run['oml:task_id'])
-    task_type = obtain_field(run, 'oml:task_type', from_server)
+    run = xmltodict.parse(xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"])[
+        "oml:run"
+    ]
+    run_id = obtain_field(run, "oml:run_id", from_server, cast=int)
+    uploader = obtain_field(run, "oml:uploader", from_server, cast=int)
+    uploader_name = obtain_field(run, "oml:uploader_name", from_server)
+    task_id = int(run["oml:task_id"])
+    task_type = obtain_field(run, "oml:task_type", from_server)
 
     # even with the server requirement this field may be empty.
-    if 'oml:task_evaluation_measure' in run:
-        task_evaluation_measure = run['oml:task_evaluation_measure']
+    if "oml:task_evaluation_measure" in run:
+        task_evaluation_measure = run["oml:task_evaluation_measure"]
     else:
         task_evaluation_measure = None
 
-    if not from_server and run['oml:flow_id'] is None:
+    if not from_server and run["oml:flow_id"] is None:
         # This can happen for a locally stored run of which the flow is not yet published.
         flow_id = None
         parameters = None
     else:
-        flow_id = obtain_field(run, 'oml:flow_id', from_server, cast=int)
+        flow_id = obtain_field(run, "oml:flow_id", from_server, cast=int)
         # parameters are only properly formatted once the flow is established on the server.
         # thus they are also not stored for runs with local flows.
         parameters = []
-        if 'oml:parameter_setting' in run:
-            obtained_parameter_settings = run['oml:parameter_setting']
+        if "oml:parameter_setting" in run:
+            obtained_parameter_settings = run["oml:parameter_setting"]
             for parameter_dict in obtained_parameter_settings:
                 current_parameter = OrderedDict()
-                current_parameter['oml:name'] = parameter_dict['oml:name']
-                current_parameter['oml:value'] = parameter_dict['oml:value']
-                if 'oml:component' in parameter_dict:
-                    current_parameter['oml:component'] = \
-                        parameter_dict['oml:component']
+                current_parameter["oml:name"] = parameter_dict["oml:name"]
+                current_parameter["oml:value"] = parameter_dict["oml:value"]
+                if "oml:component" in parameter_dict:
+                    current_parameter["oml:component"] = parameter_dict["oml:component"]
                 parameters.append(current_parameter)
 
-    flow_name = obtain_field(run, 'oml:flow_name', from_server)
-    setup_id = obtain_field(run, 'oml:setup_id', from_server, cast=int)
-    setup_string = obtain_field(run, 'oml:setup_string', from_server)
+    flow_name = obtain_field(run, "oml:flow_name", from_server)
+    setup_id = obtain_field(run, "oml:setup_id", from_server, cast=int)
+    setup_string = obtain_field(run, "oml:setup_string", from_server)
 
-    if 'oml:input_data' in run:
-        dataset_id = int(run['oml:input_data']['oml:dataset']['oml:did'])
+    if "oml:input_data" in run:
+        dataset_id = int(run["oml:input_data"]["oml:dataset"]["oml:did"])
     elif not from_server:
         dataset_id = None
     else:
         # fetching the task to obtain dataset_id
         t = openml.tasks.get_task(task_id, download_data=False)
-        if not hasattr(t, 'dataset_id'):
-            raise ValueError("Unable to fetch dataset_id from the task({}) "
-                             "linked to run({})".format(task_id, run_id))
+        if not hasattr(t, "dataset_id"):
+            raise ValueError(
+                "Unable to fetch dataset_id from the task({}) "
+                "linked to run({})".format(task_id, run_id)
+            )
         dataset_id = t.dataset_id
 
     files = OrderedDict()
     evaluations = OrderedDict()
     fold_evaluations = OrderedDict()
     sample_evaluations = OrderedDict()
-    if 'oml:output_data' not in run:
+    if "oml:output_data" not in run:
         if from_server:
-            raise ValueError('Run does not contain output_data '
-                             '(OpenML server error?)')
+            raise ValueError("Run does not contain output_data " "(OpenML server error?)")
     else:
-        output_data = run['oml:output_data']
+        output_data = run["oml:output_data"]
         predictions_url = None
-        if 'oml:file' in output_data:
+        if "oml:file" in output_data:
             # multiple files, the normal case
-            for file_dict in output_data['oml:file']:
-                files[file_dict['oml:name']] = int(file_dict['oml:file_id'])
-                if file_dict['oml:name'] == 'predictions':
-                    predictions_url = file_dict['oml:url']
-        if 'oml:evaluation' in output_data:
+            for file_dict in output_data["oml:file"]:
+                files[file_dict["oml:name"]] = int(file_dict["oml:file_id"])
+                if file_dict["oml:name"] == "predictions":
+                    predictions_url = file_dict["oml:url"]
+        if "oml:evaluation" in output_data:
             # in normal cases there should be evaluations, but in case there
             # was an error these could be absent
-            for evaluation_dict in output_data['oml:evaluation']:
-                key = evaluation_dict['oml:name']
-                if 'oml:value' in evaluation_dict:
-                    value = float(evaluation_dict['oml:value'])
-                elif 'oml:array_data' in evaluation_dict:
-                    value = evaluation_dict['oml:array_data']
+            for evaluation_dict in output_data["oml:evaluation"]:
+                key = evaluation_dict["oml:name"]
+                if "oml:value" in evaluation_dict:
+                    value = float(evaluation_dict["oml:value"])
+                elif "oml:array_data" in evaluation_dict:
+                    value = evaluation_dict["oml:array_data"]
                 else:
-                    raise ValueError('Could not find keys "value" or '
-                                     '"array_data" in %s' %
-                                     str(evaluation_dict.keys()))
-                if '@repeat' in evaluation_dict and '@fold' in \
-                        evaluation_dict and '@sample' in evaluation_dict:
-                    repeat = int(evaluation_dict['@repeat'])
-                    fold = int(evaluation_dict['@fold'])
-                    sample = int(evaluation_dict['@sample'])
+                    raise ValueError(
+                        'Could not find keys "value" or '
+                        '"array_data" in %s' % str(evaluation_dict.keys())
+                    )
+                if (
+                    "@repeat" in evaluation_dict
+                    and "@fold" in evaluation_dict
+                    and "@sample" in evaluation_dict
+                ):
+                    repeat = int(evaluation_dict["@repeat"])
+                    fold = int(evaluation_dict["@fold"])
+                    sample = int(evaluation_dict["@sample"])
                     if key not in sample_evaluations:
                         sample_evaluations[key] = OrderedDict()
                     if repeat not in sample_evaluations[key]:
@@ -730,9 +741,9 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
                     if fold not in sample_evaluations[key][repeat]:
                         sample_evaluations[key][repeat][fold] = OrderedDict()
                     sample_evaluations[key][repeat][fold][sample] = value
-                elif '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
-                    repeat = int(evaluation_dict['@repeat'])
-                    fold = int(evaluation_dict['@fold'])
+                elif "@repeat" in evaluation_dict and "@fold" in evaluation_dict:
+                    repeat = int(evaluation_dict["@repeat"])
+                    fold = int(evaluation_dict["@fold"])
                     if key not in fold_evaluations:
                         fold_evaluations[key] = OrderedDict()
                     if repeat not in fold_evaluations[key]:
@@ -741,54 +752,55 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
                 else:
                     evaluations[key] = value
 
-    if 'description' not in files and from_server is True:
-        raise ValueError('No description file for run %d in run '
-                         'description XML' % run_id)
+    if "description" not in files and from_server is True:
+        raise ValueError("No description file for run %d in run " "description XML" % run_id)
 
-    if 'predictions' not in files and from_server is True:
+    if "predictions" not in files and from_server is True:
         task = openml.tasks.get_task(task_id)
         if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY:
-            raise NotImplementedError(
-                'Subgroup discovery tasks are not yet supported.'
-            )
+            raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
         else:
             # JvR: actually, I am not sure whether this error should be raised.
             # a run can consist without predictions. But for now let's keep it
             # Matthias: yes, it should stay as long as we do not really handle
             # this stuff
-            raise ValueError('No prediction files for run %d in run '
-                             'description XML' % run_id)
+            raise ValueError("No prediction files for run %d in run " "description XML" % run_id)
 
-    tags = openml.utils.extract_xml_tags('oml:tag', run)
+    tags = openml.utils.extract_xml_tags("oml:tag", run)
 
-    return OpenMLRun(run_id=run_id, uploader=uploader,
-                     uploader_name=uploader_name, task_id=task_id,
-                     task_type=task_type,
-                     task_evaluation_measure=task_evaluation_measure,
-                     flow_id=flow_id, flow_name=flow_name,
-                     setup_id=setup_id, setup_string=setup_string,
-                     parameter_settings=parameters,
-                     dataset_id=dataset_id, output_files=files,
-                     evaluations=evaluations,
-                     fold_evaluations=fold_evaluations,
-                     sample_evaluations=sample_evaluations, tags=tags,
-                     predictions_url=predictions_url)
+    return OpenMLRun(
+        run_id=run_id,
+        uploader=uploader,
+        uploader_name=uploader_name,
+        task_id=task_id,
+        task_type=task_type,
+        task_evaluation_measure=task_evaluation_measure,
+        flow_id=flow_id,
+        flow_name=flow_name,
+        setup_id=setup_id,
+        setup_string=setup_string,
+        parameter_settings=parameters,
+        dataset_id=dataset_id,
+        output_files=files,
+        evaluations=evaluations,
+        fold_evaluations=fold_evaluations,
+        sample_evaluations=sample_evaluations,
+        tags=tags,
+        predictions_url=predictions_url,
+    )
 
 
 def _get_cached_run(run_id):
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(
-        RUNS_CACHE_DIR_NAME, run_id,
-    )
+    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id,)
     try:
         run_file = os.path.join(run_cache_dir, "description.xml")
-        with io.open(run_file, encoding='utf8') as fh:
+        with io.open(run_file, encoding="utf8") as fh:
             run = _create_run_from_xml(xml=fh.read())
         return run
 
     except (OSError, IOError):
-        raise OpenMLCacheException("Run file for run id %d not "
-                                   "cached" % run_id)
+        raise OpenMLCacheException("Run file for run id %d not " "cached" % run_id)
 
 
 def list_runs(
@@ -802,7 +814,7 @@ def list_runs(
     tag: Optional[str] = None,
     study: Optional[int] = None,
     display_errors: bool = False,
-    output_format: str = 'dict',
+    output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
@@ -846,34 +858,37 @@ def list_runs(
     -------
     dict of dicts, or dataframe
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
 
     if id is not None and (not isinstance(id, list)):
-        raise TypeError('id must be of type list.')
+        raise TypeError("id must be of type list.")
     if task is not None and (not isinstance(task, list)):
-        raise TypeError('task must be of type list.')
+        raise TypeError("task must be of type list.")
     if setup is not None and (not isinstance(setup, list)):
-        raise TypeError('setup must be of type list.')
+        raise TypeError("setup must be of type list.")
     if flow is not None and (not isinstance(flow, list)):
-        raise TypeError('flow must be of type list.')
+        raise TypeError("flow must be of type list.")
     if uploader is not None and (not isinstance(uploader, list)):
-        raise TypeError('uploader must be of type list.')
-
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_runs,
-                                  offset=offset,
-                                  size=size,
-                                  id=id,
-                                  task=task,
-                                  setup=setup,
-                                  flow=flow,
-                                  uploader=uploader,
-                                  tag=tag,
-                                  study=study,
-                                  display_errors=display_errors,
-                                  **kwargs)
+        raise TypeError("uploader must be of type list.")
+
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_runs,
+        offset=offset,
+        size=size,
+        id=id,
+        task=task,
+        setup=setup,
+        flow=flow,
+        uploader=uploader,
+        tag=tag,
+        study=study,
+        display_errors=display_errors,
+        **kwargs
+    )
 
 
 def _list_runs(
@@ -884,7 +899,7 @@ def _list_runs(
     uploader: Optional[List] = None,
     study: Optional[int] = None,
     display_errors: bool = False,
-    output_format: str = 'dict',
+    output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
@@ -933,15 +948,15 @@ def _list_runs(
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
     if id is not None:
-        api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
+        api_call += "/run/%s" % ",".join([str(int(i)) for i in id])
     if task is not None:
-        api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
+        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
     if setup is not None:
-        api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
     if flow is not None:
-        api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
+        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
     if uploader is not None:
-        api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
+        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
     if study is not None:
         api_call += "/study/%d" % study
     if display_errors:
@@ -949,42 +964,43 @@ def _list_runs(
     return __list_runs(api_call=api_call, output_format=output_format)
 
 
-def __list_runs(api_call, output_format='dict'):
+def __list_runs(api_call, output_format="dict"):
     """Helper function to parse API calls which are lists of runs"""
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    runs_dict = xmltodict.parse(xml_string, force_list=('oml:run',))
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
     # Minimalistic check if the XML is useful
-    if 'oml:runs' not in runs_dict:
-        raise ValueError('Error in return XML, does not contain "oml:runs": %s'
-                         % str(runs_dict))
-    elif '@xmlns:oml' not in runs_dict['oml:runs']:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:runs"/@xmlns:oml: %s'
-                         % str(runs_dict))
-    elif runs_dict['oml:runs']['@xmlns:oml'] != 'http://openml.org/openml':
-        raise ValueError('Error in return XML, value of  '
-                         '"oml:runs"/@xmlns:oml is not '
-                         '"http://openml.org/openml": %s'
-                         % str(runs_dict))
-
-    assert type(runs_dict['oml:runs']['oml:run']) == list, \
-        type(runs_dict['oml:runs'])
+    if "oml:runs" not in runs_dict:
+        raise ValueError('Error in return XML, does not contain "oml:runs": %s' % str(runs_dict))
+    elif "@xmlns:oml" not in runs_dict["oml:runs"]:
+        raise ValueError(
+            "Error in return XML, does not contain " '"oml:runs"/@xmlns:oml: %s' % str(runs_dict)
+        )
+    elif runs_dict["oml:runs"]["@xmlns:oml"] != "http://openml.org/openml":
+        raise ValueError(
+            "Error in return XML, value of  "
+            '"oml:runs"/@xmlns:oml is not '
+            '"http://openml.org/openml": %s' % str(runs_dict)
+        )
+
+    assert type(runs_dict["oml:runs"]["oml:run"]) == list, type(runs_dict["oml:runs"])
 
     runs = OrderedDict()
-    for run_ in runs_dict['oml:runs']['oml:run']:
-        run_id = int(run_['oml:run_id'])
-        run = {'run_id': run_id,
-               'task_id': int(run_['oml:task_id']),
-               'setup_id': int(run_['oml:setup_id']),
-               'flow_id': int(run_['oml:flow_id']),
-               'uploader': int(run_['oml:uploader']),
-               'task_type': int(run_['oml:task_type_id']),
-               'upload_time': str(run_['oml:upload_time']),
-               'error_message': str((run_['oml:error_message']) or '')}
+    for run_ in runs_dict["oml:runs"]["oml:run"]:
+        run_id = int(run_["oml:run_id"])
+        run = {
+            "run_id": run_id,
+            "task_id": int(run_["oml:task_id"]),
+            "setup_id": int(run_["oml:setup_id"]),
+            "flow_id": int(run_["oml:flow_id"]),
+            "uploader": int(run_["oml:uploader"]),
+            "task_type": int(run_["oml:task_type_id"]),
+            "upload_time": str(run_["oml:upload_time"]),
+            "error_message": str((run_["oml:error_message"]) or ""),
+        }
 
         runs[run_id] = run
 
-    if output_format == 'dataframe':
-        runs = pd.DataFrame.from_dict(runs, orient='index')
+    if output_format == "dataframe":
+        runs = pd.DataFrame.from_dict(runs, orient="index")
 
     return runs
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 910801971..a61fc4688 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -14,13 +14,14 @@
 from openml.base import OpenMLBase
 from ..exceptions import PyOpenMLError
 from ..flows import get_flow
-from ..tasks import (get_task,
-                     TaskTypeEnum,
-                     OpenMLClassificationTask,
-                     OpenMLLearningCurveTask,
-                     OpenMLClusteringTask,
-                     OpenMLRegressionTask
-                     )
+from ..tasks import (
+    get_task,
+    TaskTypeEnum,
+    OpenMLClassificationTask,
+    OpenMLLearningCurveTask,
+    OpenMLClusteringTask,
+    OpenMLRegressionTask,
+)
 
 
 class OpenMLRun(OpenMLBase):
@@ -36,13 +37,32 @@ class OpenMLRun(OpenMLBase):
            Refers to the data.
     """
 
-    def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
-                 output_files=None, setup_id=None, tags=None, uploader=None,
-                 uploader_name=None, evaluations=None, fold_evaluations=None,
-                 sample_evaluations=None, data_content=None, trace=None,
-                 model=None, task_type=None, task_evaluation_measure=None,
-                 flow_name=None, parameter_settings=None, predictions_url=None,
-                 task=None, flow=None, run_id=None):
+    def __init__(
+        self,
+        task_id,
+        flow_id,
+        dataset_id,
+        setup_string=None,
+        output_files=None,
+        setup_id=None,
+        tags=None,
+        uploader=None,
+        uploader_name=None,
+        evaluations=None,
+        fold_evaluations=None,
+        sample_evaluations=None,
+        data_content=None,
+        trace=None,
+        model=None,
+        task_type=None,
+        task_evaluation_measure=None,
+        flow_name=None,
+        parameter_settings=None,
+        predictions_url=None,
+        task=None,
+        flow=None,
+        run_id=None,
+    ):
         self.uploader = uploader
         self.uploader_name = uploader_name
         self.task_id = task_id
@@ -74,35 +94,53 @@ def id(self) -> Optional[int]:
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
-        fields = {"Uploader Name": self.uploader_name,
-                  "Metric": self.task_evaluation_measure,
-                  "Run ID": self.run_id,
-                  "Task ID": self.task_id,
-                  "Task Type": self.task_type,
-                  "Task URL": openml.tasks.OpenMLTask.url_for_id(self.task_id),
-                  "Flow ID": self.flow_id,
-                  "Flow Name": self.flow_name,
-                  "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-                  "Setup ID": self.setup_id,
-                  "Setup String": self.setup_string,
-                  "Dataset ID": self.dataset_id,
-                  "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id)}
+        fields = {
+            "Uploader Name": self.uploader_name,
+            "Metric": self.task_evaluation_measure,
+            "Run ID": self.run_id,
+            "Task ID": self.task_id,
+            "Task Type": self.task_type,
+            "Task URL": openml.tasks.OpenMLTask.url_for_id(self.task_id),
+            "Flow ID": self.flow_id,
+            "Flow Name": self.flow_name,
+            "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
+            "Setup ID": self.setup_id,
+            "Setup String": self.setup_string,
+            "Dataset ID": self.dataset_id,
+            "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id),
+        }
         if self.uploader is not None:
-            fields["Uploader Profile"] = "{}/u/{}".format(openml.config.get_server_base_url(),
-                                                          self.uploader)
+            fields["Uploader Profile"] = "{}/u/{}".format(
+                openml.config.get_server_base_url(), self.uploader
+            )
         if self.run_id is not None:
             fields["Run URL"] = self.openml_url
         if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
             fields["Result"] = self.evaluations[self.task_evaluation_measure]
 
         # determines the order in which the information will be printed
-        order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
-                 "Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
-                 "Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
+        order = [
+            "Uploader Name",
+            "Uploader Profile",
+            "Metric",
+            "Result",
+            "Run ID",
+            "Run URL",
+            "Task ID",
+            "Task Type",
+            "Task URL",
+            "Flow ID",
+            "Flow Name",
+            "Flow URL",
+            "Setup ID",
+            "Setup String",
+            "Dataset ID",
+            "Dataset URL",
+        ]
         return [(key, fields[key]) for key in order if key in fields]
 
     @classmethod
-    def from_filesystem(cls, directory: str, expect_model: bool = True) -> 'OpenMLRun':
+    def from_filesystem(cls, directory: str, expect_model: bool = True) -> "OpenMLRun":
         """
         The inverse of the to_filesystem method. Instantiates an OpenMLRun
         object based on files stored on the file system.
@@ -128,21 +166,21 @@ def from_filesystem(cls, directory: str, expect_model: bool = True) -> 'OpenMLRu
         import openml.runs.functions
 
         if not os.path.isdir(directory):
-            raise ValueError('Could not find folder')
+            raise ValueError("Could not find folder")
 
-        description_path = os.path.join(directory, 'description.xml')
-        predictions_path = os.path.join(directory, 'predictions.arff')
-        trace_path = os.path.join(directory, 'trace.arff')
-        model_path = os.path.join(directory, 'model.pkl')
+        description_path = os.path.join(directory, "description.xml")
+        predictions_path = os.path.join(directory, "predictions.arff")
+        trace_path = os.path.join(directory, "trace.arff")
+        model_path = os.path.join(directory, "model.pkl")
 
         if not os.path.isfile(description_path):
-            raise ValueError('Could not find description.xml')
+            raise ValueError("Could not find description.xml")
         if not os.path.isfile(predictions_path):
-            raise ValueError('Could not find predictions.arff')
+            raise ValueError("Could not find predictions.arff")
         if not os.path.isfile(model_path) and expect_model:
-            raise ValueError('Could not find model.pkl')
+            raise ValueError("Could not find model.pkl")
 
-        with open(description_path, 'r') as fht:
+        with open(description_path, "r") as fht:
             xml_string = fht.read()
         run = openml.runs.functions._create_run_from_xml(xml_string, from_server=False)
 
@@ -151,14 +189,14 @@ def from_filesystem(cls, directory: str, expect_model: bool = True) -> 'OpenMLRu
             run.flow = flow
             run.flow_name = flow.name
 
-        with open(predictions_path, 'r') as fht:
+        with open(predictions_path, "r") as fht:
             predictions = arff.load(fht)
-            run.data_content = predictions['data']
+            run.data_content = predictions["data"]
 
         if os.path.isfile(model_path):
             # note that it will load the model if the file exists, even if
             # expect_model is False
-            with open(model_path, 'rb') as fhb:
+            with open(model_path, "rb") as fhb:
                 run.model = pickle.load(fhb)
 
         if os.path.isfile(trace_path):
@@ -166,11 +204,7 @@ def from_filesystem(cls, directory: str, expect_model: bool = True) -> 'OpenMLRu
 
         return run
 
-    def to_filesystem(
-        self,
-        directory: str,
-        store_model: bool = True,
-    ) -> None:
+    def to_filesystem(self, directory: str, store_model: bool = True,) -> None:
         """
         The inverse of the from_filesystem method. Serializes a run
         on the filesystem, to be uploaded later.
@@ -187,25 +221,24 @@ def to_filesystem(
             model.
         """
         if self.data_content is None or self.model is None:
-            raise ValueError('Run should have been executed (and contain '
-                             'model / predictions)')
+            raise ValueError("Run should have been executed (and contain " "model / predictions)")
 
         os.makedirs(directory, exist_ok=True)
         if not os.listdir(directory) == []:
             raise ValueError(
-                'Output directory {} should be empty'.format(os.path.abspath(directory))
+                "Output directory {} should be empty".format(os.path.abspath(directory))
             )
 
         run_xml = self._to_xml()
         predictions_arff = arff.dumps(self._generate_arff_dict())
 
         # It seems like typing does not allow to define the same variable multiple times
-        with open(os.path.join(directory, 'description.xml'), 'w') as fh:  # type: TextIO
+        with open(os.path.join(directory, "description.xml"), "w") as fh:  # type: TextIO
             fh.write(run_xml)
-        with open(os.path.join(directory, 'predictions.arff'), 'w') as fh:
+        with open(os.path.join(directory, "predictions.arff"), "w") as fh:
             fh.write(predictions_arff)
         if store_model:
-            with open(os.path.join(directory, 'model.pkl'), 'wb') as fh_b:  # type: IO[bytes]
+            with open(os.path.join(directory, "model.pkl"), "wb") as fh_b:  # type: IO[bytes]
                 pickle.dump(self.model, fh_b)
 
         if self.flow_id is None:
@@ -214,7 +247,7 @@ def to_filesystem(
         if self.trace is not None:
             self.trace._to_filesystem(directory)
 
-    def _generate_arff_dict(self) -> 'OrderedDict[str, Any]':
+    def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
         """Generates the arff dictionary for uploading predictions to the
         server.
 
@@ -227,78 +260,84 @@ def _generate_arff_dict(self) -> 'OrderedDict[str, Any]':
             Contains predictions and information about the run environment.
         """
         if self.data_content is None:
-            raise ValueError('Run has not been executed.')
+            raise ValueError("Run has not been executed.")
         if self.flow is None:
             self.flow = get_flow(self.flow_id)
 
-        run_environment = (self.flow.extension.get_version_information()
-                           + [time.strftime("%c")]
-                           + ['Created by run_task()'])
+        run_environment = (
+            self.flow.extension.get_version_information()
+            + [time.strftime("%c")]
+            + ["Created by run_task()"]
+        )
         task = get_task(self.task_id)
 
         arff_dict = OrderedDict()  # type: 'OrderedDict[str, Any]'
-        arff_dict['data'] = self.data_content
-        arff_dict['description'] = "\n".join(run_environment)
-        arff_dict['relation'] =\
-            'openml_task_{}_predictions'.format(task.task_id)
+        arff_dict["data"] = self.data_content
+        arff_dict["description"] = "\n".join(run_environment)
+        arff_dict["relation"] = "openml_task_{}_predictions".format(task.task_id)
 
         if isinstance(task, OpenMLLearningCurveTask):
             class_labels = task.class_labels
             instance_specifications = [
-                ('repeat', 'NUMERIC'),
-                ('fold', 'NUMERIC'),
-                ('sample', 'NUMERIC'),
-                ('row_id', 'NUMERIC')
+                ("repeat", "NUMERIC"),
+                ("fold", "NUMERIC"),
+                ("sample", "NUMERIC"),
+                ("row_id", "NUMERIC"),
             ]
 
-            arff_dict['attributes'] = instance_specifications
+            arff_dict["attributes"] = instance_specifications
             if class_labels is not None:
-                arff_dict['attributes'] = arff_dict['attributes'] + \
-                    [('confidence.' + class_labels[i],
-                      'NUMERIC')
-                     for i in range(len(class_labels))] + \
-                    [('prediction', class_labels),
-                     ('correct', class_labels)]
+                arff_dict["attributes"] = (
+                    arff_dict["attributes"]
+                    + [
+                        ("confidence." + class_labels[i], "NUMERIC")
+                        for i in range(len(class_labels))
+                    ]
+                    + [("prediction", class_labels), ("correct", class_labels)]
+                )
             else:
-                raise ValueError('The task has no class labels')
+                raise ValueError("The task has no class labels")
 
         elif isinstance(task, OpenMLClassificationTask):
             class_labels = task.class_labels
-            instance_specifications = [('repeat', 'NUMERIC'),
-                                       ('fold', 'NUMERIC'),
-                                       ('sample', 'NUMERIC'),  # Legacy
-                                       ('row_id', 'NUMERIC')]
+            instance_specifications = [
+                ("repeat", "NUMERIC"),
+                ("fold", "NUMERIC"),
+                ("sample", "NUMERIC"),  # Legacy
+                ("row_id", "NUMERIC"),
+            ]
 
-            arff_dict['attributes'] = instance_specifications
+            arff_dict["attributes"] = instance_specifications
             if class_labels is not None:
-                prediction_confidences = [('confidence.' + class_labels[i],
-                                           'NUMERIC')
-                                          for i in range(len(class_labels))]
-                prediction_and_true = [('prediction', class_labels),
-                                       ('correct', class_labels)]
-                arff_dict['attributes'] = arff_dict['attributes'] + \
-                    prediction_confidences + \
-                    prediction_and_true
+                prediction_confidences = [
+                    ("confidence." + class_labels[i], "NUMERIC") for i in range(len(class_labels))
+                ]
+                prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
+                arff_dict["attributes"] = (
+                    arff_dict["attributes"] + prediction_confidences + prediction_and_true
+                )
             else:
-                raise ValueError('The task has no class labels')
+                raise ValueError("The task has no class labels")
 
         elif isinstance(task, OpenMLRegressionTask):
-            arff_dict['attributes'] = [('repeat', 'NUMERIC'),
-                                       ('fold', 'NUMERIC'),
-                                       ('row_id', 'NUMERIC'),
-                                       ('prediction', 'NUMERIC'),
-                                       ('truth', 'NUMERIC')]
+            arff_dict["attributes"] = [
+                ("repeat", "NUMERIC"),
+                ("fold", "NUMERIC"),
+                ("row_id", "NUMERIC"),
+                ("prediction", "NUMERIC"),
+                ("truth", "NUMERIC"),
+            ]
 
         elif isinstance(task, OpenMLClusteringTask):
-            arff_dict['attributes'] = [('repeat', 'NUMERIC'),
-                                       ('fold', 'NUMERIC'),
-                                       ('row_id', 'NUMERIC'),
-                                       ('cluster', 'NUMERIC')]
+            arff_dict["attributes"] = [
+                ("repeat", "NUMERIC"),
+                ("fold", "NUMERIC"),
+                ("row_id", "NUMERIC"),
+                ("cluster", "NUMERIC"),
+            ]
 
         else:
-            raise NotImplementedError(
-                'Task type %s is not yet supported.' % str(task.task_type)
-            )
+            raise NotImplementedError("Task type %s is not yet supported." % str(task.task_type))
 
         return arff_dict
 
@@ -323,34 +362,35 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
         kwargs = kwargs if kwargs else dict()
         if self.data_content is not None and self.task_id is not None:
             predictions_arff = self._generate_arff_dict()
-        elif 'predictions' in self.output_files:
+        elif "predictions" in self.output_files:
             predictions_file_url = openml._api_calls._file_id_to_url(
-                self.output_files['predictions'], 'predictions.arff',
+                self.output_files["predictions"], "predictions.arff",
             )
             response = openml._api_calls._download_text_file(predictions_file_url)
             predictions_arff = arff.loads(response)
             # TODO: make this a stream reader
         else:
-            raise ValueError('Run should have been locally executed or '
-                             'contain outputfile reference.')
+            raise ValueError(
+                "Run should have been locally executed or " "contain outputfile reference."
+            )
 
         # Need to know more about the task to compute scores correctly
         task = get_task(self.task_id)
 
-        attribute_names = [att[0] for att in predictions_arff['attributes']]
-        if (task.task_type_id in [TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-                                  TaskTypeEnum.LEARNING_CURVE]
-                and 'correct' not in attribute_names):
-            raise ValueError('Attribute "correct" should be set for '
-                             'classification task runs')
-        if (task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION
-                and 'truth' not in attribute_names):
-            raise ValueError('Attribute "truth" should be set for '
-                             'regression task runs')
-        if (task.task_type_id != TaskTypeEnum.CLUSTERING
-                and 'prediction' not in attribute_names):
-            raise ValueError('Attribute "predict" should be set for '
-                             'supervised task runs')
+        attribute_names = [att[0] for att in predictions_arff["attributes"]]
+        if (
+            task.task_type_id
+            in [TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE]
+            and "correct" not in attribute_names
+        ):
+            raise ValueError('Attribute "correct" should be set for ' "classification task runs")
+        if (
+            task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION
+            and "truth" not in attribute_names
+        ):
+            raise ValueError('Attribute "truth" should be set for ' "regression task runs")
+        if task.task_type_id != TaskTypeEnum.CLUSTERING and "prediction" not in attribute_names:
+            raise ValueError('Attribute "predict" should be set for ' "supervised task runs")
 
         def _attribute_list_to_dict(attribute_list):
             # convenience function: Creates a mapping to map from the name of
@@ -362,34 +402,39 @@ def _attribute_list_to_dict(attribute_list):
                 res[attribute_list[idx][0]] = idx
             return res
 
-        attribute_dict = \
-            _attribute_list_to_dict(predictions_arff['attributes'])
+        attribute_dict = _attribute_list_to_dict(predictions_arff["attributes"])
 
-        repeat_idx = attribute_dict['repeat']
-        fold_idx = attribute_dict['fold']
-        predicted_idx = attribute_dict['prediction']  # Assume supervised task
+        repeat_idx = attribute_dict["repeat"]
+        fold_idx = attribute_dict["fold"]
+        predicted_idx = attribute_dict["prediction"]  # Assume supervised task
 
-        if task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION or \
-                task.task_type_id == TaskTypeEnum.LEARNING_CURVE:
-            correct_idx = attribute_dict['correct']
+        if (
+            task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION
+            or task.task_type_id == TaskTypeEnum.LEARNING_CURVE
+        ):
+            correct_idx = attribute_dict["correct"]
         elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
-            correct_idx = attribute_dict['truth']
+            correct_idx = attribute_dict["truth"]
         has_samples = False
-        if 'sample' in attribute_dict:
-            sample_idx = attribute_dict['sample']
+        if "sample" in attribute_dict:
+            sample_idx = attribute_dict["sample"]
             has_samples = True
 
-        if predictions_arff['attributes'][predicted_idx][1] != \
-                predictions_arff['attributes'][correct_idx][1]:
-            pred = predictions_arff['attributes'][predicted_idx][1]
-            corr = predictions_arff['attributes'][correct_idx][1]
-            raise ValueError('Predicted and Correct do not have equal values:'
-                             ' %s Vs. %s' % (str(pred), str(corr)))
+        if (
+            predictions_arff["attributes"][predicted_idx][1]
+            != predictions_arff["attributes"][correct_idx][1]
+        ):
+            pred = predictions_arff["attributes"][predicted_idx][1]
+            corr = predictions_arff["attributes"][correct_idx][1]
+            raise ValueError(
+                "Predicted and Correct do not have equal values:"
+                " %s Vs. %s" % (str(pred), str(corr))
+            )
 
         # TODO: these could be cached
         values_predict = {}
         values_correct = {}
-        for line_idx, line in enumerate(predictions_arff['data']):
+        for line_idx, line in enumerate(predictions_arff["data"]):
             rep = line[repeat_idx]
             fold = line[fold_idx]
             if has_samples:
@@ -397,12 +442,14 @@ def _attribute_list_to_dict(attribute_list):
             else:
                 samp = 0  # No learning curve sample, always 0
 
-            if task.task_type_id in [TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-                                     TaskTypeEnum.LEARNING_CURVE]:
-                prediction = predictions_arff['attributes'][predicted_idx][
-                    1].index(line[predicted_idx])
-                correct = predictions_arff['attributes'][predicted_idx][1]. \
-                    index(line[correct_idx])
+            if task.task_type_id in [
+                TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+                TaskTypeEnum.LEARNING_CURVE,
+            ]:
+                prediction = predictions_arff["attributes"][predicted_idx][1].index(
+                    line[predicted_idx]
+                )
+                correct = predictions_arff["attributes"][predicted_idx][1].index(line[correct_idx])
             elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
                 prediction = line[predicted_idx]
                 correct = line[correct_idx]
@@ -430,7 +477,7 @@ def _attribute_list_to_dict(attribute_list):
 
     def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
-        self.run_id = int(xml_response['oml:upload_run']['oml:run_id'])
+        self.run_id = int(xml_response["oml:upload_run"]["oml:run_id"])
 
     def _get_file_elements(self) -> Dict:
         """ Get file_elements to upload to the server.
@@ -440,8 +487,7 @@ def _get_file_elements(self) -> Dict:
         """
         if self.model is None:
             raise PyOpenMLError(
-                "OpenMLRun obj does not contain a model. "
-                "(This should never happen.) "
+                "OpenMLRun obj does not contain a model. " "(This should never happen.) "
             )
         if self.flow_id is None:
             if self.flow is None:
@@ -458,56 +504,65 @@ def _get_file_elements(self) -> Dict:
             if self.flow is None:
                 self.flow = openml.flows.get_flow(self.flow_id)
             self.parameter_settings = self.flow.extension.obtain_parameter_values(
-                self.flow,
-                self.model,
+                self.flow, self.model,
             )
 
-        file_elements = {'description': ("description.xml", self._to_xml())}
+        file_elements = {"description": ("description.xml", self._to_xml())}
 
         if self.error_message is None:
             predictions = arff.dumps(self._generate_arff_dict())
-            file_elements['predictions'] = ("predictions.arff", predictions)
+            file_elements["predictions"] = ("predictions.arff", predictions)
 
         if self.trace is not None:
             trace_arff = arff.dumps(self.trace.trace_to_arff())
-            file_elements['trace'] = ("trace.arff", trace_arff)
+            file_elements["trace"] = ("trace.arff", trace_arff)
         return file_elements
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self. """
         description = OrderedDict()  # type: 'OrderedDict'
-        description['oml:run'] = OrderedDict()
-        description['oml:run']['@xmlns:oml'] = 'http://openml.org/openml'
-        description['oml:run']['oml:task_id'] = self.task_id
-        description['oml:run']['oml:flow_id'] = self.flow_id
+        description["oml:run"] = OrderedDict()
+        description["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
+        description["oml:run"]["oml:task_id"] = self.task_id
+        description["oml:run"]["oml:flow_id"] = self.flow_id
         if self.error_message is not None:
-            description['oml:run']['oml:error_message'] = self.error_message
-        description['oml:run']['oml:parameter_setting'] = self.parameter_settings
+            description["oml:run"]["oml:error_message"] = self.error_message
+        description["oml:run"]["oml:parameter_setting"] = self.parameter_settings
         if self.tags is not None:
-            description['oml:run']['oml:tag'] = self.tags  # Tags describing the run
-        if (self.fold_evaluations is not None and len(self.fold_evaluations) > 0) or \
-                (self.sample_evaluations is not None and len(self.sample_evaluations) > 0):
-            description['oml:run']['oml:output_data'] = OrderedDict()
-            description['oml:run']['oml:output_data']['oml:evaluation'] = list()
+            description["oml:run"]["oml:tag"] = self.tags  # Tags describing the run
+        if (self.fold_evaluations is not None and len(self.fold_evaluations) > 0) or (
+            self.sample_evaluations is not None and len(self.sample_evaluations) > 0
+        ):
+            description["oml:run"]["oml:output_data"] = OrderedDict()
+            description["oml:run"]["oml:output_data"]["oml:evaluation"] = list()
         if self.fold_evaluations is not None:
             for measure in self.fold_evaluations:
                 for repeat in self.fold_evaluations[measure]:
                     for fold, value in self.fold_evaluations[measure][repeat].items():
-                        current = OrderedDict([
-                            ('@repeat', str(repeat)), ('@fold', str(fold)),
-                            ('oml:name', measure), ('oml:value', str(value))])
-                        description['oml:run']['oml:output_data'][
-                            'oml:evaluation'].append(current)
+                        current = OrderedDict(
+                            [
+                                ("@repeat", str(repeat)),
+                                ("@fold", str(fold)),
+                                ("oml:name", measure),
+                                ("oml:value", str(value)),
+                            ]
+                        )
+                        description["oml:run"]["oml:output_data"]["oml:evaluation"].append(current)
         if self.sample_evaluations is not None:
             for measure in self.sample_evaluations:
                 for repeat in self.sample_evaluations[measure]:
                     for fold in self.sample_evaluations[measure][repeat]:
-                        for sample, value in \
-                                self.sample_evaluations[measure][repeat][fold].items():
-                            current = OrderedDict([
-                                ('@repeat', str(repeat)), ('@fold', str(fold)),
-                                ('@sample', str(sample)), ('oml:name', measure),
-                                ('oml:value', str(value))])
-                            description['oml:run']['oml:output_data'][
-                                'oml:evaluation'].append(current)
+                        for sample, value in self.sample_evaluations[measure][repeat][fold].items():
+                            current = OrderedDict(
+                                [
+                                    ("@repeat", str(repeat)),
+                                    ("@fold", str(fold)),
+                                    ("@sample", str(sample)),
+                                    ("oml:name", measure),
+                                    ("oml:value", str(value)),
+                                ]
+                            )
+                            description["oml:run"]["oml:output_data"]["oml:evaluation"].append(
+                                current
+                            )
         return description
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 220a10c95..0c05b9dc8 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -8,13 +8,13 @@
 import arff
 import xmltodict
 
-PREFIX = 'parameter_'
+PREFIX = "parameter_"
 REQUIRED_ATTRIBUTES = [
-    'repeat',
-    'fold',
-    'iteration',
-    'evaluation',
-    'selected',
+    "repeat",
+    "fold",
+    "iteration",
+    "evaluation",
+    "selected",
 ]
 
 
@@ -55,15 +55,10 @@ def get_selected_iteration(self, fold: int, repeat: int) -> int:
             selected as the best iteration by the search procedure
         """
         for (r, f, i) in self.trace_iterations:
-            if (
-                r == repeat
-                and f == fold
-                and self.trace_iterations[(r, f, i)].selected is True
-            ):
+            if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
                 return i
         raise ValueError(
-            'Could not find the selected iteration for rep/fold %d/%d' %
-            (repeat, fold)
+            "Could not find the selected iteration for rep/fold %d/%d" % (repeat, fold)
         )
 
     @classmethod
@@ -89,26 +84,26 @@ def generate(cls, attributes, content):
         """
 
         if content is None:
-            raise ValueError('Trace content not available.')
+            raise ValueError("Trace content not available.")
         elif attributes is None:
-            raise ValueError('Trace attributes not available.')
+            raise ValueError("Trace attributes not available.")
         elif len(content) == 0:
-            raise ValueError('Trace content is empty.')
+            raise ValueError("Trace content is empty.")
         elif len(attributes) != len(content[0]):
             raise ValueError(
-                'Trace_attributes and trace_content not compatible:'
-                ' %s vs %s' % (attributes, content[0])
+                "Trace_attributes and trace_content not compatible:"
+                " %s vs %s" % (attributes, content[0])
             )
 
         return cls._trace_from_arff_struct(
             attributes=attributes,
             content=content,
-            error_message='setup_string not allowed when constructing a '
-                          'trace object from run results.'
+            error_message="setup_string not allowed when constructing a "
+            "trace object from run results.",
         )
 
     @classmethod
-    def _from_filesystem(cls, file_path: str) -> 'OpenMLRunTrace':
+    def _from_filesystem(cls, file_path: str) -> "OpenMLRunTrace":
         """
         Logic to deserialize the trace from the filesystem.
 
@@ -122,17 +117,17 @@ def _from_filesystem(cls, file_path: str) -> 'OpenMLRunTrace':
         OpenMLRunTrace
         """
         if not os.path.isfile(file_path):
-            raise ValueError('Trace file doesn\'t exist')
+            raise ValueError("Trace file doesn't exist")
 
-        with open(file_path, 'r') as fp:
+        with open(file_path, "r") as fp:
             trace_arff = arff.load(fp)
 
-        for trace_idx in range(len(trace_arff['data'])):
+        for trace_idx in range(len(trace_arff["data"])):
             # iterate over first three entrees of a trace row
             # (fold, repeat, trace_iteration) these should be int
             for line_idx in range(3):
-                trace_arff['data'][trace_idx][line_idx] = int(
-                    trace_arff['data'][trace_idx][line_idx]
+                trace_arff["data"][trace_idx][line_idx] = int(
+                    trace_arff["data"][trace_idx][line_idx]
                 )
 
         return cls.trace_from_arff(trace_arff)
@@ -149,7 +144,7 @@ def _to_filesystem(self, file_path):
         """
 
         trace_arff = arff.dumps(self.trace_to_arff())
-        with open(os.path.join(file_path, 'trace.arff'), 'w') as f:
+        with open(os.path.join(file_path, "trace.arff"), "w") as f:
             f.write(trace_arff)
 
     def trace_to_arff(self):
@@ -168,16 +163,18 @@ def trace_to_arff(self):
 
         # attributes that will be in trace arff
         trace_attributes = [
-            ('repeat', 'NUMERIC'),
-            ('fold', 'NUMERIC'),
-            ('iteration', 'NUMERIC'),
-            ('evaluation', 'NUMERIC'),
-            ('selected', ['true', 'false']),
+            ("repeat", "NUMERIC"),
+            ("fold", "NUMERIC"),
+            ("iteration", "NUMERIC"),
+            ("evaluation", "NUMERIC"),
+            ("selected", ["true", "false"]),
         ]
-        trace_attributes.extend([
-            (PREFIX + parameter, 'STRING') for parameter in
-            next(iter(self.trace_iterations.values())).get_parameters()
-        ])
+        trace_attributes.extend(
+            [
+                (PREFIX + parameter, "STRING")
+                for parameter in next(iter(self.trace_iterations.values())).get_parameters()
+            ]
+        )
 
         arff_dict = OrderedDict()
         data = []
@@ -185,23 +182,23 @@ def trace_to_arff(self):
             tmp_list = []
             for attr, _ in trace_attributes:
                 if attr.startswith(PREFIX):
-                    attr = attr[len(PREFIX):]
+                    attr = attr[len(PREFIX) :]
                     value = trace_iteration.get_parameters()[attr]
                 else:
                     value = getattr(trace_iteration, attr)
-                if attr == 'selected':
+                if attr == "selected":
                     if value:
-                        tmp_list.append('true')
+                        tmp_list.append("true")
                     else:
-                        tmp_list.append('false')
+                        tmp_list.append("false")
                 else:
                     tmp_list.append(value)
             data.append(tmp_list)
 
-        arff_dict['attributes'] = trace_attributes
-        arff_dict['data'] = data
+        arff_dict["attributes"] = trace_attributes
+        arff_dict["data"] = data
         # TODO allow to pass a trace description when running a flow
-        arff_dict['relation'] = "Trace"
+        arff_dict["relation"] = "Trace"
         return arff_dict
 
     @classmethod
@@ -220,12 +217,12 @@ def trace_from_arff(cls, arff_obj):
         -------
         OpenMLRunTrace
         """
-        attributes = arff_obj['attributes']
-        content = arff_obj['data']
+        attributes = arff_obj["attributes"]
+        content = arff_obj["data"]
         return cls._trace_from_arff_struct(
             attributes=attributes,
             content=content,
-            error_message='setup_string not supported for arff serialization'
+            error_message="setup_string not supported for arff serialization",
         )
 
     @classmethod
@@ -235,10 +232,8 @@ def _trace_from_arff_struct(cls, attributes, content, error_message):
 
         for required_attribute in REQUIRED_ATTRIBUTES:
             if required_attribute not in attribute_idx:
-                raise ValueError(
-                    'arff misses required attribute: %s' % required_attribute
-                )
-        if 'setup_string' in attribute_idx:
+                raise ValueError("arff misses required attribute: %s" % required_attribute)
+        if "setup_string" in attribute_idx:
             raise ValueError(error_message)
 
         # note that the required attributes can not be duplicated because
@@ -247,36 +242,35 @@ def _trace_from_arff_struct(cls, attributes, content, error_message):
         for attribute in attribute_idx:
             if attribute in REQUIRED_ATTRIBUTES:
                 continue
-            elif attribute == 'setup_string':
+            elif attribute == "setup_string":
                 continue
             elif not attribute.startswith(PREFIX):
                 raise ValueError(
-                    'Encountered unknown attribute %s that does not start '
-                    'with prefix %s' % (attribute, PREFIX)
+                    "Encountered unknown attribute %s that does not start "
+                    "with prefix %s" % (attribute, PREFIX)
                 )
             else:
                 parameter_attributes.append(attribute)
 
         for itt in content:
-            repeat = int(itt[attribute_idx['repeat']])
-            fold = int(itt[attribute_idx['fold']])
-            iteration = int(itt[attribute_idx['iteration']])
-            evaluation = float(itt[attribute_idx['evaluation']])
-            selected_value = itt[attribute_idx['selected']]
-            if selected_value == 'true':
+            repeat = int(itt[attribute_idx["repeat"]])
+            fold = int(itt[attribute_idx["fold"]])
+            iteration = int(itt[attribute_idx["iteration"]])
+            evaluation = float(itt[attribute_idx["evaluation"]])
+            selected_value = itt[attribute_idx["selected"]]
+            if selected_value == "true":
                 selected = True
-            elif selected_value == 'false':
+            elif selected_value == "false":
                 selected = False
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for selected field, '
-                    'received: %s' % selected_value
+                    "received: %s" % selected_value
                 )
 
-            parameters = OrderedDict([
-                (attribute, itt[attribute_idx[attribute]])
-                for attribute in parameter_attributes
-            ])
+            parameters = OrderedDict(
+                [(attribute, itt[attribute_idx[attribute]]) for attribute in parameter_attributes]
+            )
 
             current = OpenMLTraceIteration(
                 repeat=repeat,
@@ -309,64 +303,58 @@ def trace_from_xml(cls, xml):
             Object containing the run id and a dict containing the trace
             iterations.
         """
-        result_dict = xmltodict.parse(
-            xml, force_list=('oml:trace_iteration',)
-        )['oml:trace']
+        result_dict = xmltodict.parse(xml, force_list=("oml:trace_iteration",))["oml:trace"]
 
-        run_id = result_dict['oml:run_id']
+        run_id = result_dict["oml:run_id"]
         trace = OrderedDict()
 
-        if 'oml:trace_iteration' not in result_dict:
-            raise ValueError('Run does not contain valid trace. ')
-        if not isinstance(result_dict['oml:trace_iteration'], list):
-            raise TypeError(type(result_dict['oml:trace_iteration']))
-
-        for itt in result_dict['oml:trace_iteration']:
-            repeat = int(itt['oml:repeat'])
-            fold = int(itt['oml:fold'])
-            iteration = int(itt['oml:iteration'])
-            setup_string = json.loads(itt['oml:setup_string'])
-            evaluation = float(itt['oml:evaluation'])
-            selected_value = itt['oml:selected']
-            if selected_value == 'true':
+        if "oml:trace_iteration" not in result_dict:
+            raise ValueError("Run does not contain valid trace. ")
+        if not isinstance(result_dict["oml:trace_iteration"], list):
+            raise TypeError(type(result_dict["oml:trace_iteration"]))
+
+        for itt in result_dict["oml:trace_iteration"]:
+            repeat = int(itt["oml:repeat"])
+            fold = int(itt["oml:fold"])
+            iteration = int(itt["oml:iteration"])
+            setup_string = json.loads(itt["oml:setup_string"])
+            evaluation = float(itt["oml:evaluation"])
+            selected_value = itt["oml:selected"]
+            if selected_value == "true":
                 selected = True
-            elif selected_value == 'false':
+            elif selected_value == "false":
                 selected = False
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for '
-                    'selected field, received: %s' % selected_value
+                    "selected field, received: %s" % selected_value
                 )
 
             current = OpenMLTraceIteration(
-                repeat,
-                fold,
-                iteration,
-                setup_string,
-                evaluation,
-                selected,
+                repeat, fold, iteration, setup_string, evaluation, selected,
             )
             trace[(repeat, fold, iteration)] = current
 
         return cls(run_id, trace)
 
     @classmethod
-    def merge_traces(cls, traces: List['OpenMLRunTrace']) -> 'OpenMLRunTrace':
+    def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
 
-        merged_trace = OrderedDict()  # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration]  # noqa E501
+        merged_trace = (
+            OrderedDict()
+        )  # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration]  # noqa E501
 
         previous_iteration = None
         for trace in traces:
             for iteration in trace:
                 key = (iteration.repeat, iteration.fold, iteration.iteration)
                 if previous_iteration is not None:
-                    if (
-                        list(merged_trace[previous_iteration].parameters.keys())
-                        != list(iteration.parameters.keys())
+                    if list(merged_trace[previous_iteration].parameters.keys()) != list(
+                        iteration.parameters.keys()
                     ):
                         raise ValueError(
-                            'Cannot merge traces because the parameters are not equal: {} vs {}'.
-                            format(
+                            "Cannot merge traces because the parameters are not equal: "
+                            "{} vs {}".format(
                                 list(merged_trace[previous_iteration].parameters.keys()),
                                 list(iteration.parameters.keys()),
                             )
@@ -383,9 +371,8 @@ def merge_traces(cls, traces: List['OpenMLRunTrace']) -> 'OpenMLRunTrace':
         return cls(None, merged_trace)
 
     def __repr__(self):
-        return '[Run id: {}, {} trace iterations]'.format(
-            -1 if self.run_id is None else self.run_id,
-            len(self.trace_iterations),
+        return "[Run id: {}, {} trace iterations]".format(
+            -1 if self.run_id is None else self.run_id, len(self.trace_iterations),
         )
 
     def __iter__(self):
@@ -423,31 +410,20 @@ class OpenMLTraceIteration(object):
     """
 
     def __init__(
-        self,
-        repeat,
-        fold,
-        iteration,
-        setup_string,
-        evaluation,
-        selected,
-        parameters=None,
+        self, repeat, fold, iteration, setup_string, evaluation, selected, parameters=None,
     ):
 
         if not isinstance(selected, bool):
             raise TypeError(type(selected))
         if setup_string and parameters:
             raise ValueError(
-                'Can only be instantiated with either '
-                'setup_string or parameters argument.'
+                "Can only be instantiated with either " "setup_string or parameters argument."
             )
         elif not setup_string and not parameters:
-            raise ValueError(
-                'Either setup_string or parameters needs to be passed as '
-                'argument.'
-            )
+            raise ValueError("Either setup_string or parameters needs to be passed as " "argument.")
         if parameters is not None and not isinstance(parameters, OrderedDict):
             raise TypeError(
-                'argument parameters is not an instance of OrderedDict, but %s'
+                "argument parameters is not an instance of OrderedDict, but %s"
                 % str(type(parameters))
             )
 
@@ -465,19 +441,19 @@ def get_parameters(self):
 
         if self.setup_string:
             for param in self.setup_string:
-                key = param[len(PREFIX):]
+                key = param[len(PREFIX) :]
                 value = self.setup_string[param]
                 result[key] = json.loads(value)
         else:
             for param, value in self.parameters.items():
-                result[param[len(PREFIX):]] = value
+                result[param[len(PREFIX) :]] = value
         return result
 
     def __repr__(self):
         """
         tmp string representation, will be changed in the near future
         """
-        return '[(%d,%d,%d): %f (%r)]' % (
+        return "[(%d,%d,%d): %f (%r)]" % (
             self.repeat,
             self.fold,
             self.iteration,
diff --git a/openml/setups/__init__.py b/openml/setups/__init__.py
index 4f0be9571..31f4f503f 100644
--- a/openml/setups/__init__.py
+++ b/openml/setups/__init__.py
@@ -3,5 +3,11 @@
 from .setup import OpenMLSetup, OpenMLParameter
 from .functions import get_setup, list_setups, setup_exists, initialize_model
 
-__all__ = ['OpenMLSetup', 'OpenMLParameter', 'get_setup', 'list_setups',
-           'setup_exists', 'initialize_model']
+__all__ = [
+    "OpenMLSetup",
+    "OpenMLParameter",
+    "get_setup",
+    "list_setups",
+    "setup_exists",
+    "initialize_model",
+]
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 5f3b796c8..b418a6106 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -35,25 +35,23 @@ def setup_exists(flow) -> int:
     # sadly, this api call relies on a run object
     openml.flows.functions._check_flow_for_server_id(flow)
     if flow.model is None:
-        raise ValueError('Flow should have model field set with the actual model.')
+        raise ValueError("Flow should have model field set with the actual model.")
     if flow.extension is None:
-        raise ValueError('Flow should have model field set with the correct extension.')
+        raise ValueError("Flow should have model field set with the correct extension.")
 
     # checks whether the flow exists on the server and flow ids align
     exists = flow_exists(flow.name, flow.external_version)
     if exists != flow.flow_id:
-        raise ValueError('This should not happen!')
+        raise ValueError("This should not happen!")
 
     openml_param_settings = flow.extension.obtain_parameter_values(flow)
-    description = xmltodict.unparse(_to_dict(flow.flow_id,
-                                             openml_param_settings),
-                                    pretty=True)
-    file_elements = {'description': ('description.arff', description)}
-    result = openml._api_calls._perform_api_call('/setup/exists/',
-                                                 'post',
-                                                 file_elements=file_elements)
+    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
+    file_elements = {"description": ("description.arff", description)}
+    result = openml._api_calls._perform_api_call(
+        "/setup/exists/", "post", file_elements=file_elements
+    )
     result_dict = xmltodict.parse(result)
-    setup_id = int(result_dict['oml:setup_exists']['oml:id'])
+    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
     if setup_id > 0:
         return setup_id
     else:
@@ -66,14 +64,15 @@ def _get_cached_setup(setup_id):
     setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
     try:
         setup_file = os.path.join(setup_cache_dir, "description.xml")
-        with io.open(setup_file, encoding='utf8') as fh:
+        with io.open(setup_file, encoding="utf8") as fh:
             setup_xml = xmltodict.parse(fh.read())
-            setup = _create_setup_from_xml(setup_xml, output_format='object')
+            setup = _create_setup_from_xml(setup_xml, output_format="object")
         return setup
 
     except (OSError, IOError):
         raise openml.exceptions.OpenMLCacheException(
-            "Setup file for setup id %d not cached" % setup_id)
+            "Setup file for setup id %d not cached" % setup_id
+        )
 
 
 def get_setup(setup_id):
@@ -90,9 +89,7 @@ def get_setup(setup_id):
     -------
     dict or OpenMLSetup(an initialized openml setup object)
     """
-    setup_dir = os.path.join(config.get_cache_directory(),
-                             "setups",
-                             str(setup_id))
+    setup_dir = os.path.join(config.get_cache_directory(), "setups", str(setup_id))
     setup_file = os.path.join(setup_dir, "description.xml")
 
     if not os.path.exists(setup_dir):
@@ -101,13 +98,13 @@ def get_setup(setup_id):
     try:
         return _get_cached_setup(setup_id)
     except (openml.exceptions.OpenMLCacheException):
-        url_suffix = '/setup/%d' % setup_id
-        setup_xml = openml._api_calls._perform_api_call(url_suffix, 'get')
-        with io.open(setup_file, "w", encoding='utf8') as fh:
+        url_suffix = "/setup/%d" % setup_id
+        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
+        with io.open(setup_file, "w", encoding="utf8") as fh:
             fh.write(setup_xml)
 
     result_dict = xmltodict.parse(setup_xml)
-    return _create_setup_from_xml(result_dict, output_format='object')
+    return _create_setup_from_xml(result_dict, output_format="object")
 
 
 def list_setups(
@@ -116,7 +113,7 @@ def list_setups(
     flow: Optional[int] = None,
     tag: Optional[str] = None,
     setup: Optional[List] = None,
-    output_format: str = 'object'
+    output_format: str = "object",
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all setups matching all of the given filters.
@@ -138,22 +135,25 @@ def list_setups(
     -------
     dict or dataframe
     """
-    if output_format not in ['dataframe', 'dict', 'object']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict', 'object', or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict", "object"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict', 'object', or 'dataframe' applicable."
+        )
 
     batch_size = 1000  # batch size for setups is lower
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_setups,
-                                  offset=offset,
-                                  size=size,
-                                  flow=flow,
-                                  tag=tag,
-                                  setup=setup,
-                                  batch_size=batch_size)
-
-
-def _list_setups(setup=None, output_format='object', **kwargs):
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_setups,
+        offset=offset,
+        size=size,
+        flow=flow,
+        tag=tag,
+        setup=setup,
+        batch_size=batch_size,
+    )
+
+
+def _list_setups(setup=None, output_format="object", **kwargs):
     """
     Perform API call `/setup/list/{filters}`
 
@@ -179,7 +179,7 @@ def _list_setups(setup=None, output_format='object', **kwargs):
 
     api_call = "setup/list"
     if setup is not None:
-        api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
@@ -187,40 +187,43 @@ def _list_setups(setup=None, output_format='object', **kwargs):
     return __list_setups(api_call=api_call, output_format=output_format)
 
 
-def __list_setups(api_call, output_format='object'):
+def __list_setups(api_call, output_format="object"):
     """Helper function to parse API calls which are lists of setups"""
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    setups_dict = xmltodict.parse(xml_string, force_list=('oml:setup',))
-    openml_uri = 'http://openml.org/openml'
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",))
+    openml_uri = "http://openml.org/openml"
     # Minimalistic check if the XML is useful
-    if 'oml:setups' not in setups_dict:
-        raise ValueError('Error in return XML, does not contain "oml:setups":'
-                         ' %s' % str(setups_dict))
-    elif '@xmlns:oml' not in setups_dict['oml:setups']:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:setups"/@xmlns:oml: %s'
-                         % str(setups_dict))
-    elif setups_dict['oml:setups']['@xmlns:oml'] != openml_uri:
-        raise ValueError('Error in return XML, value of  '
-                         '"oml:seyups"/@xmlns:oml is not '
-                         '"%s": %s'
-                         % (openml_uri, str(setups_dict)))
-
-    assert type(setups_dict['oml:setups']['oml:setup']) == list, \
-        type(setups_dict['oml:setups'])
+    if "oml:setups" not in setups_dict:
+        raise ValueError(
+            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict)
+        )
+    elif "@xmlns:oml" not in setups_dict["oml:setups"]:
+        raise ValueError(
+            "Error in return XML, does not contain "
+            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict)
+        )
+    elif setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
+        raise ValueError(
+            "Error in return XML, value of  "
+            '"oml:seyups"/@xmlns:oml is not '
+            '"%s": %s' % (openml_uri, str(setups_dict))
+        )
+
+    assert type(setups_dict["oml:setups"]["oml:setup"]) == list, type(setups_dict["oml:setups"])
 
     setups = dict()
-    for setup_ in setups_dict['oml:setups']['oml:setup']:
+    for setup_ in setups_dict["oml:setups"]["oml:setup"]:
         # making it a dict to give it the right format
-        current = _create_setup_from_xml({'oml:setup_parameters': setup_},
-                                         output_format=output_format)
-        if output_format == 'object':
+        current = _create_setup_from_xml(
+            {"oml:setup_parameters": setup_}, output_format=output_format
+        )
+        if output_format == "object":
             setups[current.setup_id] = current
         else:
-            setups[current['setup_id']] = current
+            setups[current["setup_id"]] = current
 
-    if output_format == 'dataframe':
-        setups = pd.DataFrame.from_dict(setups, orient='index')
+    if output_format == "dataframe":
+        setups = pd.DataFrame.from_dict(setups, orient="index")
 
     return setups
 
@@ -246,13 +249,12 @@ def initialize_model(setup_id: int) -> Any:
     # OpenMLFlow objects default parameter value so we can utilize the
     # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
     for hyperparameter in setup.parameters.values():
-        structure = flow.get_structure('flow_id')
+        structure = flow.get_structure("flow_id")
         if len(structure[hyperparameter.flow_id]) > 0:
             subflow = flow.get_subflow(structure[hyperparameter.flow_id])
         else:
             subflow = flow
-        subflow.parameters[hyperparameter.parameter_name] = \
-            hyperparameter.value
+        subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value
 
     model = flow.extension.flow_to_model(flow)
     return model
@@ -261,63 +263,70 @@ def initialize_model(setup_id: int) -> Any:
 def _to_dict(flow_id, openml_parameter_settings):
     # for convenience, this function (ab)uses the run object.
     xml = OrderedDict()
-    xml['oml:run'] = OrderedDict()
-    xml['oml:run']['@xmlns:oml'] = 'http://openml.org/openml'
-    xml['oml:run']['oml:flow_id'] = flow_id
-    xml['oml:run']['oml:parameter_setting'] = openml_parameter_settings
+    xml["oml:run"] = OrderedDict()
+    xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
+    xml["oml:run"]["oml:flow_id"] = flow_id
+    xml["oml:run"]["oml:parameter_setting"] = openml_parameter_settings
 
     return xml
 
 
-def _create_setup_from_xml(result_dict, output_format='object'):
+def _create_setup_from_xml(result_dict, output_format="object"):
     """
     Turns an API xml result into a OpenMLSetup object (or dict)
     """
-    setup_id = int(result_dict['oml:setup_parameters']['oml:setup_id'])
-    flow_id = int(result_dict['oml:setup_parameters']['oml:flow_id'])
+    setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"])
+    flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"])
     parameters = {}
-    if 'oml:parameter' not in result_dict['oml:setup_parameters']:
+    if "oml:parameter" not in result_dict["oml:setup_parameters"]:
         parameters = None
     else:
         # basically all others
-        xml_parameters = result_dict['oml:setup_parameters']['oml:parameter']
+        xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"]
         if isinstance(xml_parameters, dict):
-            id = int(xml_parameters['oml:id'])
-            parameters[id] = _create_setup_parameter_from_xml(result_dict=xml_parameters,
-                                                              output_format=output_format)
+            id = int(xml_parameters["oml:id"])
+            parameters[id] = _create_setup_parameter_from_xml(
+                result_dict=xml_parameters, output_format=output_format
+            )
         elif isinstance(xml_parameters, list):
             for xml_parameter in xml_parameters:
-                id = int(xml_parameter['oml:id'])
-                parameters[id] = \
-                    _create_setup_parameter_from_xml(result_dict=xml_parameter,
-                                                     output_format=output_format)
+                id = int(xml_parameter["oml:id"])
+                parameters[id] = _create_setup_parameter_from_xml(
+                    result_dict=xml_parameter, output_format=output_format
+                )
         else:
-            raise ValueError('Expected None, list or dict, received '
-                             'something else: %s' % str(type(xml_parameters)))
-
-    if output_format in ['dataframe', 'dict']:
-        return_dict = {'setup_id': setup_id, 'flow_id': flow_id}
-        return_dict['parameters'] = parameters
-        return(return_dict)
+            raise ValueError(
+                "Expected None, list or dict, received "
+                "something else: %s" % str(type(xml_parameters))
+            )
+
+    if output_format in ["dataframe", "dict"]:
+        return_dict = {"setup_id": setup_id, "flow_id": flow_id}
+        return_dict["parameters"] = parameters
+        return return_dict
     return OpenMLSetup(setup_id, flow_id, parameters)
 
 
-def _create_setup_parameter_from_xml(result_dict, output_format='object'):
-    if output_format == 'object':
-        return OpenMLParameter(input_id=int(result_dict['oml:id']),
-                               flow_id=int(result_dict['oml:flow_id']),
-                               flow_name=result_dict['oml:flow_name'],
-                               full_name=result_dict['oml:full_name'],
-                               parameter_name=result_dict['oml:parameter_name'],
-                               data_type=result_dict['oml:data_type'],
-                               default_value=result_dict['oml:default_value'],
-                               value=result_dict['oml:value'])
+def _create_setup_parameter_from_xml(result_dict, output_format="object"):
+    if output_format == "object":
+        return OpenMLParameter(
+            input_id=int(result_dict["oml:id"]),
+            flow_id=int(result_dict["oml:flow_id"]),
+            flow_name=result_dict["oml:flow_name"],
+            full_name=result_dict["oml:full_name"],
+            parameter_name=result_dict["oml:parameter_name"],
+            data_type=result_dict["oml:data_type"],
+            default_value=result_dict["oml:default_value"],
+            value=result_dict["oml:value"],
+        )
     else:
-        return({'input_id': int(result_dict['oml:id']),
-                'flow_id': int(result_dict['oml:flow_id']),
-                'flow_name': result_dict['oml:flow_name'],
-                'full_name': result_dict['oml:full_name'],
-                'parameter_name': result_dict['oml:parameter_name'],
-                'data_type': result_dict['oml:data_type'],
-                'default_value': result_dict['oml:default_value'],
-                'value': result_dict['oml:value']})
+        return {
+            "input_id": int(result_dict["oml:id"]),
+            "flow_id": int(result_dict["oml:flow_id"]),
+            "flow_name": result_dict["oml:flow_name"],
+            "full_name": result_dict["oml:full_name"],
+            "parameter_name": result_dict["oml:parameter_name"],
+            "data_type": result_dict["oml:data_type"],
+            "default_value": result_dict["oml:default_value"],
+            "value": result_dict["oml:value"],
+        }
diff --git a/openml/setups/setup.py b/openml/setups/setup.py
index 36bddb11f..44919fd09 100644
--- a/openml/setups/setup.py
+++ b/openml/setups/setup.py
@@ -18,12 +18,12 @@ class OpenMLSetup(object):
 
     def __init__(self, setup_id, flow_id, parameters):
         if not isinstance(setup_id, int):
-            raise ValueError('setup id should be int')
+            raise ValueError("setup id should be int")
         if not isinstance(flow_id, int):
-            raise ValueError('flow id should be int')
+            raise ValueError("flow id should be int")
         if parameters is not None:
             if not isinstance(parameters, dict):
-                raise ValueError('parameters should be dict')
+                raise ValueError("parameters should be dict")
 
         self.setup_id = setup_id
         self.flow_id = flow_id
@@ -31,12 +31,14 @@ def __init__(self, setup_id, flow_id, parameters):
 
     def __repr__(self):
         header = "OpenML Setup"
-        header = '{}\n{}\n'.format(header, '=' * len(header))
+        header = "{}\n{}\n".format(header, "=" * len(header))
 
-        fields = {"Setup ID": self.setup_id,
-                  "Flow ID": self.flow_id,
-                  "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-                  "# of Parameters": len(self.parameters)}
+        fields = {
+            "Setup ID": self.setup_id,
+            "Flow ID": self.flow_id,
+            "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
+            "# of Parameters": len(self.parameters),
+        }
 
         # determines the order in which the information will be printed
         order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
@@ -44,7 +46,7 @@ def __repr__(self):
 
         longest_field_name_length = max(len(name) for name, value in fields)
         field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
-        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        body = "\n".join(field_line_format.format(name, value) for name, value in fields)
         return header + body
 
 
@@ -72,8 +74,18 @@ class OpenMLParameter(object):
     value : str
         If the parameter was set, the value that it was set to.
     """
-    def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name,
-                 data_type, default_value, value):
+
+    def __init__(
+        self,
+        input_id,
+        flow_id,
+        flow_name,
+        full_name,
+        parameter_name,
+        data_type,
+        default_value,
+        value,
+    ):
         self.id = input_id
         self.flow_id = flow_id
         self.flow_name = flow_name
@@ -85,14 +97,16 @@ def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name,
 
     def __repr__(self):
         header = "OpenML Parameter"
-        header = '{}\n{}\n'.format(header, '=' * len(header))
-
-        fields = {"ID": self.id,
-                  "Flow ID": self.flow_id,
-                  # "Flow Name": self.flow_name,
-                  "Flow Name": self.full_name,
-                  "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
-                  "Parameter Name": self.parameter_name}
+        header = "{}\n{}\n".format(header, "=" * len(header))
+
+        fields = {
+            "ID": self.id,
+            "Flow ID": self.flow_id,
+            # "Flow Name": self.flow_name,
+            "Flow Name": self.full_name,
+            "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
+            "Parameter Name": self.parameter_name,
+        }
         # indented prints for parameter attributes
         # indention = 2 spaces + 1 | + 2 underscores
         indent = "{}|{}".format(" " * 2, "_" * 2)
@@ -104,11 +118,19 @@ def __repr__(self):
         fields[parameter_value] = self.value
 
         # determines the order in which the information will be printed
-        order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name",
-                 parameter_data_type, parameter_default, parameter_value]
+        order = [
+            "ID",
+            "Flow ID",
+            "Flow Name",
+            "Flow URL",
+            "Parameter Name",
+            parameter_data_type,
+            parameter_default,
+            parameter_value,
+        ]
         fields = [(key, fields[key]) for key in order if key in fields]
 
         longest_field_name_length = max(len(name) for name, value in fields)
         field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
-        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        body = "\n".join(field_line_format.format(name, value) for name, value in fields)
         return header + body
diff --git a/openml/study/__init__.py b/openml/study/__init__.py
index 8fe308a8c..030ee05c2 100644
--- a/openml/study/__init__.py
+++ b/openml/study/__init__.py
@@ -20,20 +20,20 @@
 
 
 __all__ = [
-    'OpenMLStudy',
-    'OpenMLBenchmarkSuite',
-    'attach_to_study',
-    'attach_to_suite',
-    'create_benchmark_suite',
-    'create_study',
-    'delete_study',
-    'delete_suite',
-    'detach_from_study',
-    'detach_from_suite',
-    'get_study',
-    'get_suite',
-    'list_studies',
-    'list_suites',
-    'update_suite_status',
-    'update_study_status',
+    "OpenMLStudy",
+    "OpenMLBenchmarkSuite",
+    "attach_to_study",
+    "attach_to_suite",
+    "create_benchmark_suite",
+    "create_study",
+    "delete_study",
+    "delete_suite",
+    "detach_from_study",
+    "detach_from_suite",
+    "get_study",
+    "get_suite",
+    "list_studies",
+    "list_suites",
+    "update_suite_status",
+    "update_study_status",
 ]
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 015b5c19a..632581022 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -26,13 +26,12 @@ def get_suite(suite_id: Union[int, str]) -> OpenMLBenchmarkSuite:
     OpenMLSuite
         The OpenML suite object
     """
-    suite = cast(OpenMLBenchmarkSuite, _get_study(suite_id, entity_type='task'))
+    suite = cast(OpenMLBenchmarkSuite, _get_study(suite_id, entity_type="task"))
     return suite
 
 
 def get_study(
-    study_id: Union[int, str],
-    arg_for_backwards_compat: Optional[str] = None,
+    study_id: Union[int, str], arg_for_backwards_compat: Optional[str] = None,
 ) -> OpenMLStudy:  # noqa F401
     """
     Retrieves all relevant information of an OpenML study from the server.
@@ -53,86 +52,89 @@ def get_study(
     OpenMLStudy
         The OpenML study object
     """
-    if study_id == 'OpenML100':
+    if study_id == "OpenML100":
         message = (
             "It looks like you are running code from the OpenML100 paper. It still works, but lots "
             "of things have changed since then. Please use `get_suite('OpenML100')` instead."
         )
         warnings.warn(message, DeprecationWarning)
         openml.config.logger.warn(message)
-        study = _get_study(study_id, entity_type='task')
+        study = _get_study(study_id, entity_type="task")
         return cast(OpenMLBenchmarkSuite, study)  # type: ignore
     else:
-        study = cast(OpenMLStudy, _get_study(study_id, entity_type='run'))
+        study = cast(OpenMLStudy, _get_study(study_id, entity_type="run"))
         return study
 
 
 def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     call_suffix = "study/{}".format(str(id_))
-    xml_string = openml._api_calls._perform_api_call(call_suffix, 'get')
+    xml_string = openml._api_calls._perform_api_call(call_suffix, "get")
     force_list_tags = (
-        'oml:data_id', 'oml:flow_id', 'oml:task_id', 'oml:setup_id',
-        'oml:run_id',
-        'oml:tag'  # legacy.
+        "oml:data_id",
+        "oml:flow_id",
+        "oml:task_id",
+        "oml:setup_id",
+        "oml:run_id",
+        "oml:tag",  # legacy.
     )
-    result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)['oml:study']
-    study_id = int(result_dict['oml:id'])
-    alias = result_dict['oml:alias'] if 'oml:alias' in result_dict else None
-    main_entity_type = result_dict['oml:main_entity_type']
+    result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)["oml:study"]
+    study_id = int(result_dict["oml:id"])
+    alias = result_dict["oml:alias"] if "oml:alias" in result_dict else None
+    main_entity_type = result_dict["oml:main_entity_type"]
     if entity_type != main_entity_type:
         raise ValueError(
             "Unexpected entity type '{}' reported by the server, expected '{}'".format(
                 main_entity_type, entity_type,
             )
         )
-    benchmark_suite = result_dict['oml:benchmark_suite'] \
-        if 'oml:benchmark_suite' in result_dict else None
-    name = result_dict['oml:name']
-    description = result_dict['oml:description']
-    status = result_dict['oml:status']
-    creation_date = result_dict['oml:creation_date']
+    benchmark_suite = (
+        result_dict["oml:benchmark_suite"] if "oml:benchmark_suite" in result_dict else None
+    )
+    name = result_dict["oml:name"]
+    description = result_dict["oml:description"]
+    status = result_dict["oml:status"]
+    creation_date = result_dict["oml:creation_date"]
     creation_date_as_date = dateutil.parser.parse(creation_date)
-    creator = result_dict['oml:creator']
+    creator = result_dict["oml:creator"]
 
     # tags is legacy. remove once no longer needed.
     tags = []
-    if 'oml:tag' in result_dict:
-        for tag in result_dict['oml:tag']:
-            current_tag = {'name': tag['oml:name'],
-                           'write_access': tag['oml:write_access']}
-            if 'oml:window_start' in tag:
-                current_tag['window_start'] = tag['oml:window_start']
+    if "oml:tag" in result_dict:
+        for tag in result_dict["oml:tag"]:
+            current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
+            if "oml:window_start" in tag:
+                current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
 
-    if 'oml:data' in result_dict:
-        datasets = [int(x) for x in result_dict['oml:data']['oml:data_id']]
+    if "oml:data" in result_dict:
+        datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
     else:
-        raise ValueError('No datasets attached to study {}!'.format(id_))
-    if 'oml:tasks' in result_dict:
-        tasks = [int(x) for x in result_dict['oml:tasks']['oml:task_id']]
+        raise ValueError("No datasets attached to study {}!".format(id_))
+    if "oml:tasks" in result_dict:
+        tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
     else:
-        raise ValueError('No tasks attached to study {}!'.format(id_))
+        raise ValueError("No tasks attached to study {}!".format(id_))
 
-    if main_entity_type in ['runs', 'run']:
+    if main_entity_type in ["runs", "run"]:
 
-        if 'oml:flows' in result_dict:
-            flows = [int(x) for x in result_dict['oml:flows']['oml:flow_id']]
+        if "oml:flows" in result_dict:
+            flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
         else:
-            raise ValueError('No flows attached to study {}!'.format(id_))
-        if 'oml:setups' in result_dict:
-            setups = [int(x) for x in result_dict['oml:setups']['oml:setup_id']]
+            raise ValueError("No flows attached to study {}!".format(id_))
+        if "oml:setups" in result_dict:
+            setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
         else:
-            raise ValueError('No setups attached to study {}!'.format(id_))
-        if 'oml:runs' in result_dict:
+            raise ValueError("No setups attached to study {}!".format(id_))
+        if "oml:runs" in result_dict:
             runs = [
-                int(x) for x in result_dict['oml:runs']['oml:run_id']
+                int(x) for x in result_dict["oml:runs"]["oml:run_id"]
             ]  # type: Optional[List[int]]
         else:
-            if creation_date_as_date < dateutil.parser.parse('2019-01-01'):
+            if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
                 # Legacy studies did not require runs
                 runs = None
             else:
-                raise ValueError('No runs attached to study {}!'.format(id_))
+                raise ValueError("No runs attached to study {}!".format(id_))
 
         study = OpenMLStudy(
             study_id=study_id,
@@ -151,7 +153,7 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
             runs=runs,
         )  # type: BaseStudy
 
-    elif main_entity_type in ['tasks', 'task']:
+    elif main_entity_type in ["tasks", "task"]:
 
         study = OpenMLBenchmarkSuite(
             suite_id=study_id,
@@ -163,11 +165,11 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
             creator=creator,
             tags=tags,
             data=datasets,
-            tasks=tasks
+            tasks=tasks,
         )
 
     else:
-        raise ValueError('Unknown entity type {}'.format(main_entity_type))
+        raise ValueError("Unknown entity type {}".format(main_entity_type))
 
     return study
 
@@ -221,10 +223,7 @@ def create_study(
 
 
 def create_benchmark_suite(
-    name: str,
-    description: str,
-    task_ids: List[int],
-    alias: Optional[str],
+    name: str, description: str, task_ids: List[int], alias: Optional[str],
 ) -> OpenMLBenchmarkSuite:
     """
     Creates an OpenML benchmark suite (collection of entity types, where
@@ -285,20 +284,17 @@ def update_study_status(study_id: int, status: str) -> None:
     status : str,
         'active' or 'deactivated'
     """
-    legal_status = {'active', 'deactivated'}
+    legal_status = {"active", "deactivated"}
     if status not in legal_status:
-        raise ValueError('Illegal status value. '
-                         'Legal values: %s' % legal_status)
-    data = {'study_id': study_id, 'status': status}
-    result_xml = openml._api_calls._perform_api_call("study/status/update",
-                                                     'post',
-                                                     data=data)
+        raise ValueError("Illegal status value. " "Legal values: %s" % legal_status)
+    data = {"study_id": study_id, "status": status}
+    result_xml = openml._api_calls._perform_api_call("study/status/update", "post", data=data)
     result = xmltodict.parse(result_xml)
-    server_study_id = result['oml:study_status_update']['oml:id']
-    server_status = result['oml:study_status_update']['oml:status']
+    server_study_id = result["oml:study_status_update"]["oml:id"]
+    server_status = result["oml:study_status_update"]["oml:status"]
     if status != server_status or int(study_id) != int(server_study_id):
         # This should never happen
-        raise ValueError('Study id/status does not collide')
+        raise ValueError("Study id/status does not collide")
 
 
 def delete_suite(suite_id: int) -> bool:
@@ -330,7 +326,7 @@ def delete_study(study_id: int) -> bool:
     bool
         True iff the deletion was successful. False otherwise
     """
-    return openml.utils._delete_entity('study', study_id)
+    return openml.utils._delete_entity("study", study_id)
 
 
 def attach_to_suite(suite_id: int, task_ids: List[int]) -> int:
@@ -370,11 +366,11 @@ def attach_to_study(study_id: int, run_ids: List[int]) -> int:
     """
 
     # Interestingly, there's no need to tell the server about the entity type, it knows by itself
-    uri = 'study/%d/attach' % study_id
-    post_variables = {'ids': ','.join(str(x) for x in run_ids)}
-    result_xml = openml._api_calls._perform_api_call(uri, 'post', post_variables)
-    result = xmltodict.parse(result_xml)['oml:study_attach']
-    return int(result['oml:linked_entities'])
+    uri = "study/%d/attach" % study_id
+    post_variables = {"ids": ",".join(str(x) for x in run_ids)}
+    result_xml = openml._api_calls._perform_api_call(uri, "post", post_variables)
+    result = xmltodict.parse(result_xml)["oml:study_attach"]
+    return int(result["oml:linked_entities"])
 
 
 def detach_from_suite(suite_id: int, task_ids: List[int]) -> int:
@@ -413,11 +409,11 @@ def detach_from_study(study_id: int, run_ids: List[int]) -> int:
     """
 
     # Interestingly, there's no need to tell the server about the entity type, it knows by itself
-    uri = 'study/%d/detach' % study_id
-    post_variables = {'ids': ','.join(str(x) for x in run_ids)}
-    result_xml = openml._api_calls._perform_api_call(uri, 'post', post_variables)
-    result = xmltodict.parse(result_xml)['oml:study_detach']
-    return int(result['oml:linked_entities'])
+    uri = "study/%d/detach" % study_id
+    post_variables = {"ids": ",".join(str(x) for x in run_ids)}
+    result_xml = openml._api_calls._perform_api_call(uri, "post", post_variables)
+    result = xmltodict.parse(result_xml)["oml:study_detach"]
+    return int(result["oml:linked_entities"])
 
 
 def list_suites(
@@ -425,7 +421,7 @@ def list_suites(
     size: Optional[int] = None,
     status: Optional[str] = None,
     uploader: Optional[List[int]] = None,
-    output_format: str = 'dict'
+    output_format: str = "dict",
 ) -> Union[Dict, pd.DataFrame]:
     """
     Return a list of all suites which are on OpenML.
@@ -469,17 +465,20 @@ def list_suites(
             - creator
             - creation_date
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
 
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_studies,
-                                  offset=offset,
-                                  size=size,
-                                  main_entity_type='task',
-                                  status=status,
-                                  uploader=uploader,)
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_studies,
+        offset=offset,
+        size=size,
+        main_entity_type="task",
+        status=status,
+        uploader=uploader,
+    )
 
 
 def list_studies(
@@ -488,7 +487,7 @@ def list_studies(
     status: Optional[str] = None,
     uploader: Optional[List[str]] = None,
     benchmark_suite: Optional[int] = None,
-    output_format: str = 'dict'
+    output_format: str = "dict",
 ) -> Union[Dict, pd.DataFrame]:
     """
     Return a list of all studies which are on OpenML.
@@ -539,21 +538,24 @@ def list_studies(
             If qualities are calculated for the dataset, some of
             these are also returned.
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
 
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_studies,
-                                  offset=offset,
-                                  size=size,
-                                  main_entity_type='run',
-                                  status=status,
-                                  uploader=uploader,
-                                  benchmark_suite=benchmark_suite)
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_studies,
+        offset=offset,
+        size=size,
+        main_entity_type="run",
+        status=status,
+        uploader=uploader,
+        benchmark_suite=benchmark_suite,
+    )
 
 
-def _list_studies(output_format='dict', **kwargs) -> Union[Dict, pd.DataFrame]:
+def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
     """
     Perform api call to return a list of studies.
 
@@ -578,37 +580,39 @@ def _list_studies(output_format='dict', **kwargs) -> Union[Dict, pd.DataFrame]:
     return __list_studies(api_call=api_call, output_format=output_format)
 
 
-def __list_studies(api_call, output_format='object') -> Union[Dict, pd.DataFrame]:
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))
+def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
 
     # Minimalistic check if the XML is useful
-    assert type(study_dict['oml:study_list']['oml:study']) == list, \
-        type(study_dict['oml:study_list'])
-    assert study_dict['oml:study_list']['@xmlns:oml'] == \
-        'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']
+    assert type(study_dict["oml:study_list"]["oml:study"]) == list, type(
+        study_dict["oml:study_list"]
+    )
+    assert study_dict["oml:study_list"]["@xmlns:oml"] == "http://openml.org/openml", study_dict[
+        "oml:study_list"
+    ]["@xmlns:oml"]
 
     studies = dict()
-    for study_ in study_dict['oml:study_list']['oml:study']:
+    for study_ in study_dict["oml:study_list"]["oml:study"]:
         # maps from xml name to a tuple of (dict name, casting fn)
         expected_fields = {
-            'oml:id': ('id', int),
-            'oml:alias': ('alias', str),
-            'oml:main_entity_type': ('main_entity_type', str),
-            'oml:benchmark_suite': ('benchmark_suite', int),
-            'oml:name': ('name', str),
-            'oml:status': ('status', str),
-            'oml:creation_date': ('creation_date', str),
-            'oml:creator': ('creator', int),
+            "oml:id": ("id", int),
+            "oml:alias": ("alias", str),
+            "oml:main_entity_type": ("main_entity_type", str),
+            "oml:benchmark_suite": ("benchmark_suite", int),
+            "oml:name": ("name", str),
+            "oml:status": ("status", str),
+            "oml:creation_date": ("creation_date", str),
+            "oml:creator": ("creator", int),
         }
-        study_id = int(study_['oml:id'])
+        study_id = int(study_["oml:id"])
         current_study = dict()
         for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
             if oml_field_name in study_:
                 current_study[real_field_name] = cast_fn(study_[oml_field_name])
-        current_study['id'] = int(current_study['id'])
+        current_study["id"] = int(current_study["id"])
         studies[study_id] = current_study
 
-    if output_format == 'dataframe':
-        studies = pd.DataFrame.from_dict(studies, orient='index')
+    if output_format == "dataframe":
+        studies = pd.DataFrame.from_dict(studies, orient="index")
     return studies
diff --git a/openml/study/study.py b/openml/study/study.py
index 483804e03..2b00bb05c 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -54,6 +54,7 @@ class BaseStudy(OpenMLBase):
     setups : list
         a list of setup ids associated with this study
     """
+
     def __init__(
         self,
         study_id: Optional[int],
@@ -91,7 +92,7 @@ def __init__(
 
     @classmethod
     def _entity_letter(cls) -> str:
-        return 's'
+        return "s"
 
     @property
     def id(self) -> Optional[int]:
@@ -99,16 +100,18 @@ def id(self) -> Optional[int]:
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
-        fields = {"Name": self.name,
-                  "Status": self.status,
-                  "Main Entity Type": self.main_entity_type}  # type: Dict[str, Any]
+        fields = {
+            "Name": self.name,
+            "Status": self.status,
+            "Main Entity Type": self.main_entity_type,
+        }  # type: Dict[str, Any]
         if self.study_id is not None:
             fields["ID"] = self.study_id
             fields["Study URL"] = self.openml_url
         if self.creator is not None:
             fields["Creator"] = "{}/u/{}".format(openml.config.get_server_base_url(), self.creator)
         if self.creation_date is not None:
-            fields["Upload Time"] = self.creation_date.replace('T', ' ')
+            fields["Upload Time"] = self.creation_date.replace("T", " ")
         if self.data is not None:
             fields["# of Data"] = len(self.data)
         if self.tasks is not None:
@@ -119,31 +122,41 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["# of Runs"] = len(self.runs)
 
         # determines the order in which the information will be printed
-        order = ["ID", "Name", "Status", "Main Entity Type", "Study URL",
-                 "# of Data", "# of Tasks", "# of Flows", "# of Runs",
-                 "Creator", "Upload Time"]
+        order = [
+            "ID",
+            "Name",
+            "Status",
+            "Main Entity Type",
+            "Study URL",
+            "# of Data",
+            "# of Tasks",
+            "# of Flows",
+            "# of Runs",
+            "Creator",
+            "Upload Time",
+        ]
         return [(key, fields[key]) for key in order if key in fields]
 
     def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
-        self.study_id = int(xml_response['oml:study_upload']['oml:id'])
+        self.study_id = int(xml_response["oml:study_upload"]["oml:id"])
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self. """
         # some can not be uploaded, e.g., id, creator, creation_date
-        simple_props = ['alias', 'main_entity_type', 'name', 'description']
+        simple_props = ["alias", "main_entity_type", "name", "description"]
         # maps from attribute name (which is used as outer tag name) to immer
         # tag name (e.g., self.tasks -> <oml:tasks><oml:task_id>1987
         # </oml:task_id></oml:tasks>)
         complex_props = {
-            'tasks': 'task_id',
-            'runs': 'run_id',
+            "tasks": "task_id",
+            "runs": "run_id",
         }
 
         study_container = OrderedDict()  # type: 'OrderedDict'
-        namespace_list = [('@xmlns:oml', 'http://openml.org/openml')]
+        namespace_list = [("@xmlns:oml", "http://openml.org/openml")]
         study_dict = OrderedDict(namespace_list)  # type: 'OrderedDict'
-        study_container['oml:study'] = study_dict
+        study_container["oml:study"] = study_dict
 
         for prop_name in simple_props:
             content = getattr(self, prop_name, None)
@@ -152,9 +165,7 @@ def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
         for prop_name, inner_name in complex_props.items():
             content = getattr(self, prop_name, None)
             if content is not None:
-                sub_dict = {
-                    'oml:' + inner_name: content
-                }
+                sub_dict = {"oml:" + inner_name: content}
                 study_dict["oml:" + prop_name] = sub_dict
         return study_container
 
@@ -210,6 +221,7 @@ class OpenMLStudy(BaseStudy):
     setups : list
         a list of setup ids associated with this study
     """
+
     def __init__(
         self,
         study_id: Optional[int],
@@ -230,7 +242,7 @@ def __init__(
         super().__init__(
             study_id=study_id,
             alias=alias,
-            main_entity_type='run',
+            main_entity_type="run",
             benchmark_suite=benchmark_suite,
             name=name,
             description=description,
@@ -302,7 +314,7 @@ def __init__(
         super().__init__(
             study_id=suite_id,
             alias=alias,
-            main_entity_type='task',
+            main_entity_type="task",
             benchmark_suite=None,
             name=name,
             description=description,
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
index 2bd319637..f5e046f37 100644
--- a/openml/tasks/__init__.py
+++ b/openml/tasks/__init__.py
@@ -18,16 +18,16 @@
 )
 
 __all__ = [
-    'OpenMLTask',
-    'OpenMLSupervisedTask',
-    'OpenMLClusteringTask',
-    'OpenMLRegressionTask',
-    'OpenMLClassificationTask',
-    'OpenMLLearningCurveTask',
-    'create_task',
-    'get_task',
-    'get_tasks',
-    'list_tasks',
-    'OpenMLSplit',
-    'TaskTypeEnum'
+    "OpenMLTask",
+    "OpenMLSupervisedTask",
+    "OpenMLClusteringTask",
+    "OpenMLRegressionTask",
+    "OpenMLClassificationTask",
+    "OpenMLLearningCurveTask",
+    "create_task",
+    "get_task",
+    "get_tasks",
+    "list_tasks",
+    "OpenMLSplit",
+    "TaskTypeEnum",
 ]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index a386dec17..a82ce4a12 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -18,13 +18,13 @@
     TaskTypeEnum,
     OpenMLRegressionTask,
     OpenMLSupervisedTask,
-    OpenMLTask
+    OpenMLTask,
 )
 import openml.utils
 import openml._api_calls
 
 
-TASKS_CACHE_DIR_NAME = 'tasks'
+TASKS_CACHE_DIR_NAME = "tasks"
 
 
 def _get_cached_tasks():
@@ -65,20 +65,14 @@ def _get_cached_task(tid: int) -> OpenMLTask:
     -------
     OpenMLTask
     """
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(
-        TASKS_CACHE_DIR_NAME,
-        tid
-    )
+    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, tid)
 
     try:
-        with io.open(os.path.join(tid_cache_dir, "task.xml"), encoding='utf8')\
-                as fh:
+        with io.open(os.path.join(tid_cache_dir, "task.xml"), encoding="utf8") as fh:
             return _create_task_from_xml(fh.read())
     except (OSError, IOError):
-        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME,
-                                              tid_cache_dir)
-        raise OpenMLCacheException("Task file for tid %d not "
-                                   "cached" % tid)
+        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
+        raise OpenMLCacheException("Task file for tid %d not " "cached" % tid)
 
 
 def _get_estimation_procedure_list():
@@ -91,34 +85,33 @@ def _get_estimation_procedure_list():
         name, type, repeats, folds, stratified.
     """
     url_suffix = "estimationprocedure/list"
-    xml_string = openml._api_calls._perform_api_call(url_suffix,
-                                                     'get')
+    xml_string = openml._api_calls._perform_api_call(url_suffix, "get")
 
     procs_dict = xmltodict.parse(xml_string)
     # Minimalistic check if the XML is useful
-    if 'oml:estimationprocedures' not in procs_dict:
-        raise ValueError('Error in return XML, does not contain tag '
-                         'oml:estimationprocedures.')
-    elif '@xmlns:oml' not in procs_dict['oml:estimationprocedures']:
-        raise ValueError('Error in return XML, does not contain tag '
-                         '@xmlns:oml as a child of oml:estimationprocedures.')
-    elif procs_dict['oml:estimationprocedures']['@xmlns:oml'] != \
-            'http://openml.org/openml':
-        raise ValueError('Error in return XML, value of '
-                         'oml:estimationprocedures/@xmlns:oml is not '
-                         'http://openml.org/openml, but %s' %
-                         str(procs_dict['oml:estimationprocedures'][
-                             '@xmlns:oml']))
+    if "oml:estimationprocedures" not in procs_dict:
+        raise ValueError("Error in return XML, does not contain tag " "oml:estimationprocedures.")
+    elif "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
+        raise ValueError(
+            "Error in return XML, does not contain tag "
+            "@xmlns:oml as a child of oml:estimationprocedures."
+        )
+    elif procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
+        raise ValueError(
+            "Error in return XML, value of "
+            "oml:estimationprocedures/@xmlns:oml is not "
+            "http://openml.org/openml, but %s"
+            % str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+        )
 
     procs = []
-    for proc_ in procs_dict['oml:estimationprocedures'][
-            'oml:estimationprocedure']:
+    for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
         procs.append(
             {
-                'id': int(proc_['oml:id']),
-                'task_type_id': int(proc_['oml:ttid']),
-                'name': proc_['oml:name'],
-                'type': proc_['oml:type'],
+                "id": int(proc_["oml:id"]),
+                "task_type_id": int(proc_["oml:ttid"]),
+                "name": proc_["oml:name"],
+                "type": proc_["oml:type"],
             }
         )
 
@@ -130,7 +123,7 @@ def list_tasks(
     offset: Optional[int] = None,
     size: Optional[int] = None,
     tag: Optional[str] = None,
-    output_format: str = 'dict',
+    output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
@@ -179,19 +172,22 @@ def list_tasks(
         as columns: task id, dataset id, task_type and status. If qualities are
         calculated for the associated dataset, some of these are also returned.
     """
-    if output_format not in ['dataframe', 'dict']:
-        raise ValueError("Invalid output format selected. "
-                         "Only 'dict' or 'dataframe' applicable.")
-    return openml.utils._list_all(output_format=output_format,
-                                  listing_call=_list_tasks,
-                                  task_type_id=task_type_id,
-                                  offset=offset,
-                                  size=size,
-                                  tag=tag,
-                                  **kwargs)
-
-
-def _list_tasks(task_type_id=None, output_format='dict', **kwargs):
+    if output_format not in ["dataframe", "dict"]:
+        raise ValueError(
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+        )
+    return openml.utils._list_all(
+        output_format=output_format,
+        listing_call=_list_tasks,
+        task_type_id=task_type_id,
+        offset=offset,
+        size=size,
+        tag=tag,
+        **kwargs
+    )
+
+
+def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
     """
     Perform the api call to return a number of tasks having the given filters.
     Parameters
@@ -228,81 +224,75 @@ def _list_tasks(task_type_id=None, output_format='dict', **kwargs):
         api_call += "/type/%d" % int(task_type_id)
     if kwargs is not None:
         for operator, value in kwargs.items():
-            if operator == 'task_id':
-                value = ','.join([str(int(i)) for i in value])
+            if operator == "task_id":
+                value = ",".join([str(int(i)) for i in value])
             api_call += "/%s/%s" % (operator, value)
     return __list_tasks(api_call=api_call, output_format=output_format)
 
 
-def __list_tasks(api_call, output_format='dict'):
-    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
-    tasks_dict = xmltodict.parse(xml_string, force_list=('oml:task',
-                                                         'oml:input'))
+def __list_tasks(api_call, output_format="dict"):
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input"))
     # Minimalistic check if the XML is useful
-    if 'oml:tasks' not in tasks_dict:
-        raise ValueError('Error in return XML, does not contain "oml:runs": %s'
-                         % str(tasks_dict))
-    elif '@xmlns:oml' not in tasks_dict['oml:tasks']:
-        raise ValueError('Error in return XML, does not contain '
-                         '"oml:runs"/@xmlns:oml: %s'
-                         % str(tasks_dict))
-    elif tasks_dict['oml:tasks']['@xmlns:oml'] != 'http://openml.org/openml':
-        raise ValueError('Error in return XML, value of  '
-                         '"oml:runs"/@xmlns:oml is not '
-                         '"http://openml.org/openml": %s'
-                         % str(tasks_dict))
-
-    assert type(tasks_dict['oml:tasks']['oml:task']) == list, \
-        type(tasks_dict['oml:tasks'])
+    if "oml:tasks" not in tasks_dict:
+        raise ValueError('Error in return XML, does not contain "oml:runs": %s' % str(tasks_dict))
+    elif "@xmlns:oml" not in tasks_dict["oml:tasks"]:
+        raise ValueError(
+            "Error in return XML, does not contain " '"oml:runs"/@xmlns:oml: %s' % str(tasks_dict)
+        )
+    elif tasks_dict["oml:tasks"]["@xmlns:oml"] != "http://openml.org/openml":
+        raise ValueError(
+            "Error in return XML, value of  "
+            '"oml:runs"/@xmlns:oml is not '
+            '"http://openml.org/openml": %s' % str(tasks_dict)
+        )
+
+    assert type(tasks_dict["oml:tasks"]["oml:task"]) == list, type(tasks_dict["oml:tasks"])
 
     tasks = dict()
     procs = _get_estimation_procedure_list()
-    proc_dict = dict((x['id'], x) for x in procs)
+    proc_dict = dict((x["id"], x) for x in procs)
 
-    for task_ in tasks_dict['oml:tasks']['oml:task']:
+    for task_ in tasks_dict["oml:tasks"]["oml:task"]:
         tid = None
         try:
-            tid = int(task_['oml:task_id'])
-            task = {'tid': tid,
-                    'ttid': int(task_['oml:task_type_id']),
-                    'did': int(task_['oml:did']),
-                    'name': task_['oml:name'],
-                    'task_type': task_['oml:task_type'],
-                    'status': task_['oml:status']}
+            tid = int(task_["oml:task_id"])
+            task = {
+                "tid": tid,
+                "ttid": int(task_["oml:task_type_id"]),
+                "did": int(task_["oml:did"]),
+                "name": task_["oml:name"],
+                "task_type": task_["oml:task_type"],
+                "status": task_["oml:status"],
+            }
 
             # Other task inputs
-            for input in task_.get('oml:input', list()):
-                if input['@name'] == 'estimation_procedure':
-                    task[input['@name']] = \
-                        proc_dict[int(input['#text'])]['name']
+            for input in task_.get("oml:input", list()):
+                if input["@name"] == "estimation_procedure":
+                    task[input["@name"]] = proc_dict[int(input["#text"])]["name"]
                 else:
-                    value = input.get('#text')
-                    task[input['@name']] = value
+                    value = input.get("#text")
+                    task[input["@name"]] = value
 
             # The number of qualities can range from 0 to infinity
-            for quality in task_.get('oml:quality', list()):
-                if '#text' not in quality:
+            for quality in task_.get("oml:quality", list()):
+                if "#text" not in quality:
                     quality_value = 0.0
                 else:
-                    quality['#text'] = float(quality['#text'])
-                    if abs(int(quality['#text']) - quality['#text']) \
-                            < 0.0000001:
-                        quality['#text'] = int(quality['#text'])
-                    quality_value = quality['#text']
-                task[quality['@name']] = quality_value
+                    quality["#text"] = float(quality["#text"])
+                    if abs(int(quality["#text"]) - quality["#text"]) < 0.0000001:
+                        quality["#text"] = int(quality["#text"])
+                    quality_value = quality["#text"]
+                task[quality["@name"]] = quality_value
             tasks[tid] = task
         except KeyError as e:
             if tid is not None:
-                raise KeyError(
-                    "Invalid xml for task %d: %s\nFrom %s" % (
-                        tid, e, task_
-                    )
-                )
+                raise KeyError("Invalid xml for task %d: %s\nFrom %s" % (tid, e, task_))
             else:
-                raise KeyError('Could not find key %s in %s!' % (e, task_))
+                raise KeyError("Could not find key %s in %s!" % (e, task_))
 
-    if output_format == 'dataframe':
-        tasks = pd.DataFrame.from_dict(tasks, orient='index')
+    if output_format == "dataframe":
+        tasks = pd.DataFrame.from_dict(tasks, orient="index")
 
     return tasks
 
@@ -351,12 +341,9 @@ def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
     try:
         task_id = int(task_id)
     except (ValueError, TypeError):
-        raise ValueError("Dataset ID is neither an Integer nor can be "
-                         "cast to an Integer.")
+        raise ValueError("Dataset ID is neither an Integer nor can be " "cast to an Integer.")
 
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(
-        TASKS_CACHE_DIR_NAME, task_id,
-    )
+    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,)
 
     try:
         task = _get_task_description(task_id)
@@ -365,8 +352,7 @@ def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
         # Including class labels as part of task meta data handles
         #   the case where data download was initially disabled
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            task.class_labels = \
-                dataset.retrieve_class_labels(task.target_name)
+            task.class_labels = dataset.retrieve_class_labels(task.target_name)
         # Clustering tasks do not have class labels
         # and do not offer download_split
         if download_data:
@@ -374,8 +360,7 @@ def get_task(task_id: int, download_data: bool = True) -> OpenMLTask:
                 task.download_split()
     except Exception as e:
         openml.utils._remove_cache_dir_for_id(
-            TASKS_CACHE_DIR_NAME,
-            tid_cache_dir,
+            TASKS_CACHE_DIR_NAME, tid_cache_dir,
         )
         raise e
 
@@ -388,16 +373,11 @@ def _get_task_description(task_id):
         return _get_cached_task(task_id)
     except OpenMLCacheException:
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(
-                TASKS_CACHE_DIR_NAME,
-                task_id,
-            ),
-            "task.xml",
+            openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,), "task.xml",
         )
-        task_xml = openml._api_calls._perform_api_call("task/%d" % task_id,
-                                                       'get')
+        task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get")
 
-        with io.open(xml_file, "w", encoding='utf8') as fh:
+        with io.open(xml_file, "w", encoding="utf8") as fh:
             fh.write(task_xml)
         return _create_task_from_xml(task_xml)
 
@@ -432,40 +412,40 @@ def _create_task_from_xml(xml):
         inputs[name] = dic["oml:input"]
 
     evaluation_measures = None
-    if 'evaluation_measures' in inputs:
-        evaluation_measures = inputs["evaluation_measures"][
-            "oml:evaluation_measures"]["oml:evaluation_measure"]
+    if "evaluation_measures" in inputs:
+        evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
+            "oml:evaluation_measure"
+        ]
 
     task_type_id = int(dic["oml:task_type_id"])
     common_kwargs = {
-        'task_id': dic["oml:task_id"],
-        'task_type': dic["oml:task_type"],
-        'task_type_id': dic["oml:task_type_id"],
-        'data_set_id': inputs["source_data"][
-            "oml:data_set"]["oml:data_set_id"],
-        'evaluation_measure': evaluation_measures,
+        "task_id": dic["oml:task_id"],
+        "task_type": dic["oml:task_type"],
+        "task_type_id": dic["oml:task_type_id"],
+        "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
+        "evaluation_measure": evaluation_measures,
     }
     if task_type_id in (
         TaskTypeEnum.SUPERVISED_CLASSIFICATION,
         TaskTypeEnum.SUPERVISED_REGRESSION,
-        TaskTypeEnum.LEARNING_CURVE
+        TaskTypeEnum.LEARNING_CURVE,
     ):
         # Convert some more parameters
-        for parameter in \
-                inputs["estimation_procedure"]["oml:estimation_procedure"][
-                    "oml:parameter"]:
+        for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
+            "oml:parameter"
+        ]:
             name = parameter["@name"]
             text = parameter.get("#text", "")
             estimation_parameters[name] = text
 
-        common_kwargs['estimation_procedure_type'] = inputs[
-            "estimation_procedure"][
-            "oml:estimation_procedure"]["oml:type"]
-        common_kwargs['estimation_parameters'] = estimation_parameters
-        common_kwargs['target_name'] = inputs[
-            "source_data"]["oml:data_set"]["oml:target_feature"]
-        common_kwargs['data_splits_url'] = inputs["estimation_procedure"][
-            "oml:estimation_procedure"]["oml:data_splits_url"]
+        common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
+            "oml:estimation_procedure"
+        ]["oml:type"]
+        common_kwargs["estimation_parameters"] = estimation_parameters
+        common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"]["oml:target_feature"]
+        common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
+            "oml:estimation_procedure"
+        ]["oml:data_splits_url"]
 
     cls = {
         TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
@@ -474,21 +454,19 @@ def _create_task_from_xml(xml):
         TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
     }.get(task_type_id)
     if cls is None:
-        raise NotImplementedError('Task type %s not supported.' %
-                                  common_kwargs['task_type'])
+        raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
     return cls(**common_kwargs)
 
 
 def create_task(
-        task_type_id: int,
-        dataset_id: int,
-        estimation_procedure_id: int,
-        target_name: Optional[str] = None,
-        evaluation_measure: Optional[str] = None,
-        **kwargs
+    task_type_id: int,
+    dataset_id: int,
+    estimation_procedure_id: int,
+    target_name: Optional[str] = None,
+    evaluation_measure: Optional[str] = None,
+    **kwargs
 ) -> Union[
-    OpenMLClassificationTask, OpenMLRegressionTask,
-    OpenMLLearningCurveTask, OpenMLClusteringTask
+    OpenMLClassificationTask, OpenMLRegressionTask, OpenMLLearningCurveTask, OpenMLClusteringTask
 ]:
     """Create a task based on different given attributes.
 
@@ -530,9 +508,7 @@ def create_task(
     }.get(task_type_id)
 
     if task_cls is None:
-        raise NotImplementedError(
-            'Task type {0:d} not supported.'.format(task_type_id)
-        )
+        raise NotImplementedError("Task type {0:d} not supported.".format(task_type_id))
     else:
         return task_cls(
             task_type_id=task_type_id,
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index ad6170a62..515be895a 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -33,42 +33,45 @@ def __init__(self, name, description, split):
             for fold in split[repetition]:
                 self.split[repetition][fold] = OrderedDict()
                 for sample in split[repetition][fold]:
-                    self.split[repetition][fold][sample] = split[
-                        repetition][fold][sample]
+                    self.split[repetition][fold][sample] = split[repetition][fold][sample]
 
         self.repeats = len(self.split)
-        if any([len(self.split[0]) != len(self.split[i])
-                for i in range(self.repeats)]):
-            raise ValueError('')
+        if any([len(self.split[0]) != len(self.split[i]) for i in range(self.repeats)]):
+            raise ValueError("")
         self.folds = len(self.split[0])
         self.samples = len(self.split[0][0])
 
     def __eq__(self, other):
-        if (type(self) != type(other)
-                or self.name != other.name
-                or self.description != other.description
-                or self.split.keys() != other.split.keys()):
+        if (
+            type(self) != type(other)
+            or self.name != other.name
+            or self.description != other.description
+            or self.split.keys() != other.split.keys()
+        ):
             return False
 
-        if any(self.split[repetition].keys() != other.split[repetition].keys()
-                for repetition in self.split):
+        if any(
+            self.split[repetition].keys() != other.split[repetition].keys()
+            for repetition in self.split
+        ):
             return False
 
-        samples = [(repetition, fold, sample)
-                   for repetition in self.split
-                   for fold in self.split[repetition]
-                   for sample in self.split[repetition][fold]]
+        samples = [
+            (repetition, fold, sample)
+            for repetition in self.split
+            for fold in self.split[repetition]
+            for sample in self.split[repetition][fold]
+        ]
 
         for repetition, fold, sample in samples:
             self_train, self_test = self.split[repetition][fold][sample]
             other_train, other_test = other.split[repetition][fold][sample]
-            if not (np.all(self_train == other_train)
-                    and np.all(self_test == other_test)):
+            if not (np.all(self_train == other_train) and np.all(self_test == other_test)):
                 return False
         return True
 
     @classmethod
-    def _from_arff_file(cls, filename: str) -> 'OpenMLSplit':
+    def _from_arff_file(cls, filename: str) -> "OpenMLSplit":
 
         repetitions = None
 
@@ -84,25 +87,19 @@ def _from_arff_file(cls, filename: str) -> 'OpenMLSplit':
         if repetitions is None:
             # Faster than liac-arff and sufficient in this situation!
             if not os.path.exists(filename):
-                raise FileNotFoundError(
-                    'Split arff %s does not exist!' % filename
-                )
+                raise FileNotFoundError("Split arff %s does not exist!" % filename)
             file_data = arff.load(open(filename), return_type=arff.DENSE_GEN)
-            splits = file_data['data']
-            name = file_data['relation']
-            attrnames = [attr[0] for attr in file_data['attributes']]
+            splits = file_data["data"]
+            name = file_data["relation"]
+            attrnames = [attr[0] for attr in file_data["attributes"]]
 
             repetitions = OrderedDict()
 
-            type_idx = attrnames.index('type')
-            rowid_idx = attrnames.index('rowid')
-            repeat_idx = attrnames.index('repeat')
-            fold_idx = attrnames.index('fold')
-            sample_idx = (
-                attrnames.index('sample')
-                if 'sample' in attrnames
-                else None
-            )
+            type_idx = attrnames.index("type")
+            rowid_idx = attrnames.index("rowid")
+            repeat_idx = attrnames.index("repeat")
+            fold_idx = attrnames.index("fold")
+            sample_idx = attrnames.index("sample") if "sample" in attrnames else None
 
             for line in splits:
                 # A line looks like type, rowid, repeat, fold
@@ -121,9 +118,9 @@ def _from_arff_file(cls, filename: str) -> 'OpenMLSplit':
                 split = repetitions[repetition][fold][sample]
 
                 type_ = line[type_idx]
-                if type_ == 'TRAIN':
+                if type_ == "TRAIN":
                     split[0].append(line[rowid_idx])
-                elif type_ == 'TEST':
+                elif type_ == "TEST":
                     split[1].append(line[rowid_idx])
                 else:
                     raise ValueError(type_)
@@ -132,16 +129,14 @@ def _from_arff_file(cls, filename: str) -> 'OpenMLSplit':
                 for fold in repetitions[repetition]:
                     for sample in repetitions[repetition][fold]:
                         repetitions[repetition][fold][sample] = Split(
-                            np.array(repetitions[repetition][fold][sample][0],
-                                     dtype=np.int32),
-                            np.array(repetitions[repetition][fold][sample][1],
-                                     dtype=np.int32))
+                            np.array(repetitions[repetition][fold][sample][0], dtype=np.int32),
+                            np.array(repetitions[repetition][fold][sample][1], dtype=np.int32),
+                        )
 
             with open(pkl_filename, "wb") as fh:
-                pickle.dump({"name": name, "repetitions": repetitions}, fh,
-                            protocol=2)
+                pickle.dump({"name": name, "repetitions": repetitions}, fh, protocol=2)
 
-        return cls(name, '', repetitions)
+        return cls(name, "", repetitions)
 
     def from_dataset(self, X, Y, folds, repeats):
         raise NotImplementedError()
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 72c12bab5..b5d95d6d1 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -32,17 +32,18 @@ class OpenMLTask(OpenMLBase):
        estimation_procedure_id: int
            Refers to the type of estimates used.
     """
+
     def __init__(
-            self,
-            task_id: Optional[int],
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            estimation_procedure_id: int = 1,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            evaluation_measure: Optional[str] = None,
-            data_splits_url: Optional[str] = None,
+        self,
+        task_id: Optional[int],
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        estimation_procedure_id: int = 1,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        evaluation_measure: Optional[str] = None,
+        data_splits_url: Optional[str] = None,
     ):
 
         self.task_id = int(task_id) if task_id is not None else None
@@ -50,7 +51,9 @@ def __init__(
         self.task_type = task_type
         self.dataset_id = int(data_set_id)
         self.evaluation_measure = evaluation_measure
-        self.estimation_procedure = dict()  # type: Dict[str, Optional[Union[str, Dict]]] # noqa E501
+        self.estimation_procedure = (
+            dict()
+        )  # type: Dict[str, Optional[Union[str, Dict]]] # noqa E501
         self.estimation_procedure["type"] = estimation_procedure_type
         self.estimation_procedure["parameters"] = estimation_parameters
         self.estimation_procedure["data_splits_url"] = data_splits_url
@@ -59,7 +62,7 @@ def __init__(
 
     @classmethod
     def _entity_letter(cls) -> str:
-        return 't'
+        return "t"
 
     @property
     def id(self) -> Optional[int]:
@@ -67,25 +70,36 @@ def id(self) -> Optional[int]:
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """ Collect all information to display in the __repr__ body. """
-        fields = {"Task Type Description": '{}/tt/{}'.format(
-            openml.config.get_server_base_url(), self.task_type_id)}  # type: Dict[str, Any]
+        fields = {
+            "Task Type Description": "{}/tt/{}".format(
+                openml.config.get_server_base_url(), self.task_type_id
+            )
+        }  # type: Dict[str, Any]
         if self.task_id is not None:
             fields["Task ID"] = self.task_id
             fields["Task URL"] = self.openml_url
         if self.evaluation_measure is not None:
             fields["Evaluation Measure"] = self.evaluation_measure
         if self.estimation_procedure is not None:
-            fields["Estimation Procedure"] = self.estimation_procedure['type']
-        if getattr(self, 'target_name', None) is not None:
-            fields["Target Feature"] = getattr(self, 'target_name')
-            if hasattr(self, 'class_labels'):
-                fields["# of Classes"] = len(getattr(self, 'class_labels'))
-            if hasattr(self, 'cost_matrix'):
+            fields["Estimation Procedure"] = self.estimation_procedure["type"]
+        if getattr(self, "target_name", None) is not None:
+            fields["Target Feature"] = getattr(self, "target_name")
+            if hasattr(self, "class_labels"):
+                fields["# of Classes"] = len(getattr(self, "class_labels"))
+            if hasattr(self, "cost_matrix"):
                 fields["Cost Matrix"] = "Available"
 
         # determines the order in which the information will be printed
-        order = ["Task Type Description", "Task ID", "Task URL", "Estimation Procedure",
-                 "Evaluation Measure", "Target Feature", "# of Classes", "Cost Matrix"]
+        order = [
+            "Task Type Description",
+            "Task ID",
+            "Task URL",
+            "Estimation Procedure",
+            "Evaluation Measure",
+            "Target Feature",
+            "# of Classes",
+            "Cost Matrix",
+        ]
         return [(key, fields[key]) for key in order if key in fields]
 
     def get_dataset(self) -> datasets.OpenMLDataset:
@@ -93,40 +107,31 @@ def get_dataset(self) -> datasets.OpenMLDataset:
         return datasets.get_dataset(self.dataset_id)
 
     def get_train_test_split_indices(
-            self,
-            fold: int = 0,
-            repeat: int = 0,
-            sample: int = 0,
+        self, fold: int = 0, repeat: int = 0, sample: int = 0,
     ) -> Tuple[np.ndarray, np.ndarray]:
 
         # Replace with retrieve from cache
         if self.split is None:
             self.split = self.download_split()
 
-        train_indices, test_indices = self.split.get(
-            repeat=repeat,
-            fold=fold,
-            sample=sample,
-        )
+        train_indices, test_indices = self.split.get(repeat=repeat, fold=fold, sample=sample,)
         return train_indices, test_indices
 
     def _download_split(self, cache_file: str):
         try:
-            with io.open(cache_file, encoding='utf8'):
+            with io.open(cache_file, encoding="utf8"):
                 pass
         except (OSError, IOError):
             split_url = self.estimation_procedure["data_splits_url"]
             openml._api_calls._download_text_file(
-                source=str(split_url),
-                output_path=cache_file,
+                source=str(split_url), output_path=cache_file,
             )
 
     def download_split(self) -> OpenMLSplit:
         """Download the OpenML split for a given task.
         """
         cached_split_file = os.path.join(
-            _create_cache_directory_for_id('tasks', self.task_id),
-            "datasplits.arff",
+            _create_cache_directory_for_id("tasks", self.task_id), "datasplits.arff",
         )
 
         try:
@@ -145,44 +150,37 @@ def get_split_dimensions(self) -> Tuple[int, int, int]:
 
         return self.split.repeats, self.split.folds, self.split.samples
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         """ Creates a dictionary representation of self. """
         task_container = OrderedDict()  # type: OrderedDict[str, OrderedDict]
-        task_dict = OrderedDict([
-            ('@xmlns:oml', 'http://openml.org/openml')
-        ])  # type: OrderedDict[str, Union[List, str, int]]
+        task_dict = OrderedDict(
+            [("@xmlns:oml", "http://openml.org/openml")]
+        )  # type: OrderedDict[str, Union[List, str, int]]
 
-        task_container['oml:task_inputs'] = task_dict
-        task_dict['oml:task_type_id'] = self.task_type_id
+        task_container["oml:task_inputs"] = task_dict
+        task_dict["oml:task_type_id"] = self.task_type_id
 
         # having task_inputs and adding a type annotation
         # solves wrong warnings
         task_inputs = [
-            OrderedDict([
-                ('@name', 'source_data'),
-                ('#text', str(self.dataset_id))
-            ]),
-            OrderedDict([
-                ('@name', 'estimation_procedure'),
-                ('#text', str(self.estimation_procedure_id))
-            ])
+            OrderedDict([("@name", "source_data"), ("#text", str(self.dataset_id))]),
+            OrderedDict(
+                [("@name", "estimation_procedure"), ("#text", str(self.estimation_procedure_id))]
+            ),
         ]  # type: List[OrderedDict]
 
         if self.evaluation_measure is not None:
             task_inputs.append(
-                OrderedDict([
-                    ('@name', 'evaluation_measures'),
-                    ('#text', self.evaluation_measure)
-                ])
+                OrderedDict([("@name", "evaluation_measures"), ("#text", self.evaluation_measure)])
             )
 
-        task_dict['oml:input'] = task_inputs
+        task_dict["oml:input"] = task_inputs
 
         return task_container
 
     def _parse_publish_response(self, xml_response: Dict):
         """ Parse the id from the xml_response and assign it to self. """
-        self.task_id = int(xml_response['oml:upload_task']['oml:id'])
+        self.task_id = int(xml_response["oml:upload_task"]["oml:id"])
 
 
 class OpenMLSupervisedTask(OpenMLTask, ABC):
@@ -195,18 +193,19 @@ class OpenMLSupervisedTask(OpenMLTask, ABC):
        target_name : str
            Name of the target feature (the class variable).
     """
+
     def __init__(
-            self,
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            target_name: str,
-            estimation_procedure_id: int = 1,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            evaluation_measure: Optional[str] = None,
-            data_splits_url: Optional[str] = None,
-            task_id: Optional[int] = None,
+        self,
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        target_name: str,
+        estimation_procedure_id: int = 1,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        evaluation_measure: Optional[str] = None,
+        data_splits_url: Optional[str] = None,
+        task_id: Optional[int] = None,
     ):
         super(OpenMLSupervisedTask, self).__init__(
             task_id=task_id,
@@ -223,11 +222,9 @@ def __init__(
         self.target_name = target_name
 
     def get_X_and_y(
-        self,
-        dataset_format: str = 'array',
+        self, dataset_format: str = "array",
     ) -> Tuple[
-        Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix],
-        Union[np.ndarray, pd.Series]
+        Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix], Union[np.ndarray, pd.Series]
     ]:
         """Get data associated with the current task.
 
@@ -245,21 +242,16 @@ def get_X_and_y(
         dataset = self.get_dataset()
         if self.task_type_id not in (1, 2, 3):
             raise NotImplementedError(self.task_type)
-        X, y, _, _ = dataset.get_data(
-            dataset_format=dataset_format, target=self.target_name,
-        )
+        X, y, _, _ = dataset.get_data(dataset_format=dataset_format, target=self.target_name,)
         return X, y
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
         task_container = super(OpenMLSupervisedTask, self)._to_dict()
-        task_dict = task_container['oml:task_inputs']
+        task_dict = task_container["oml:task_inputs"]
 
-        task_dict['oml:input'].append(
-            OrderedDict([
-                ('@name', 'target_feature'),
-                ('#text', self.target_name)
-            ])
+        task_dict["oml:input"].append(
+            OrderedDict([("@name", "target_feature"), ("#text", self.target_name)])
         )
 
         return task_container
@@ -271,7 +263,7 @@ def estimation_parameters(self):
             "The estimation_parameters attribute will be "
             "deprecated in the future, please use "
             "estimation_procedure['parameters'] instead",
-            PendingDeprecationWarning
+            PendingDeprecationWarning,
         )
         return self.estimation_procedure["parameters"]
 
@@ -291,20 +283,21 @@ class OpenMLClassificationTask(OpenMLSupervisedTask):
        class_labels : List of str (optional)
        cost_matrix: array (optional)
     """
+
     def __init__(
-            self,
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            target_name: str,
-            estimation_procedure_id: int = 1,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            evaluation_measure: Optional[str] = None,
-            data_splits_url: Optional[str] = None,
-            task_id: Optional[int] = None,
-            class_labels: Optional[List[str]] = None,
-            cost_matrix: Optional[np.ndarray] = None,
+        self,
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        target_name: str,
+        estimation_procedure_id: int = 1,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        evaluation_measure: Optional[str] = None,
+        data_splits_url: Optional[str] = None,
+        task_id: Optional[int] = None,
+        class_labels: Optional[List[str]] = None,
+        cost_matrix: Optional[np.ndarray] = None,
     ):
 
         super(OpenMLClassificationTask, self).__init__(
@@ -331,18 +324,19 @@ class OpenMLRegressionTask(OpenMLSupervisedTask):
 
        Inherited from :class:`openml.OpenMLSupervisedTask`
     """
+
     def __init__(
-            self,
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            target_name: str,
-            estimation_procedure_id: int = 7,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            data_splits_url: Optional[str] = None,
-            task_id: Optional[int] = None,
-            evaluation_measure: Optional[str] = None,
+        self,
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        target_name: str,
+        estimation_procedure_id: int = 7,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        data_splits_url: Optional[str] = None,
+        task_id: Optional[int] = None,
+        evaluation_measure: Optional[str] = None,
     ):
         super(OpenMLRegressionTask, self).__init__(
             task_id=task_id,
@@ -369,18 +363,19 @@ class OpenMLClusteringTask(OpenMLTask):
            Name of the target feature (class) that is not part of the
            feature set for the clustering task.
     """
+
     def __init__(
-            self,
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            estimation_procedure_id: int = 17,
-            task_id: Optional[int] = None,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            data_splits_url: Optional[str] = None,
-            evaluation_measure: Optional[str] = None,
-            target_name: Optional[str] = None,
+        self,
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        estimation_procedure_id: int = 17,
+        task_id: Optional[int] = None,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        data_splits_url: Optional[str] = None,
+        evaluation_measure: Optional[str] = None,
+        target_name: Optional[str] = None,
     ):
         super(OpenMLClusteringTask, self).__init__(
             task_id=task_id,
@@ -397,8 +392,7 @@ def __init__(
         self.target_name = target_name
 
     def get_X(
-        self,
-        dataset_format: str = 'array',
+        self, dataset_format: str = "array",
     ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix]:
         """Get data associated with the current task.
 
@@ -414,12 +408,10 @@ def get_X(
 
         """
         dataset = self.get_dataset()
-        data, *_ = dataset.get_data(
-            dataset_format=dataset_format, target=None,
-        )
+        data, *_ = dataset.get_data(dataset_format=dataset_format, target=None,)
         return data
 
-    def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
+    def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
         task_container = super(OpenMLClusteringTask, self)._to_dict()
 
@@ -427,7 +419,7 @@ def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
         # Uncomment if it is supported on the server
         # in the future.
         # https://github.com/openml/OpenML/issues/925
-        '''
+        """
         task_dict = task_container['oml:task_inputs']
         if self.target_name is not None:
             task_dict['oml:input'].append(
@@ -436,7 +428,7 @@ def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
                     ('#text', self.target_name)
                 ])
             )
-        '''
+        """
         return task_container
 
 
@@ -445,20 +437,21 @@ class OpenMLLearningCurveTask(OpenMLClassificationTask):
 
        Inherited from :class:`openml.OpenMLClassificationTask`
     """
+
     def __init__(
-            self,
-            task_type_id: int,
-            task_type: str,
-            data_set_id: int,
-            target_name: str,
-            estimation_procedure_id: int = 13,
-            estimation_procedure_type: Optional[str] = None,
-            estimation_parameters: Optional[Dict[str, str]] = None,
-            data_splits_url: Optional[str] = None,
-            task_id: Optional[int] = None,
-            evaluation_measure: Optional[str] = None,
-            class_labels: Optional[List[str]] = None,
-            cost_matrix: Optional[np.ndarray] = None,
+        self,
+        task_type_id: int,
+        task_type: str,
+        data_set_id: int,
+        target_name: str,
+        estimation_procedure_id: int = 13,
+        estimation_procedure_type: Optional[str] = None,
+        estimation_parameters: Optional[Dict[str, str]] = None,
+        data_splits_url: Optional[str] = None,
+        task_id: Optional[int] = None,
+        evaluation_measure: Optional[str] = None,
+        class_labels: Optional[List[str]] = None,
+        cost_matrix: Optional[np.ndarray] = None,
     ):
         super(OpenMLLearningCurveTask, self).__init__(
             task_id=task_id,
diff --git a/openml/testing.py b/openml/testing.py
index 7ebf37541..e4338effd 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -30,8 +30,15 @@ class TestBase(unittest.TestCase):
     Currently hard-codes a read-write key.
     Hopefully soon allows using a test server, not the production server.
     """
-    publish_tracker = {'run': [], 'data': [], 'flow': [], 'task': [],
-                       'study': [], 'user': []}  # type: dict
+
+    publish_tracker = {
+        "run": [],
+        "data": [],
+        "flow": [],
+        "task": [],
+        "study": [],
+        "user": [],
+    }  # type: dict
     test_server = "https://test.openml.org/api/v1/xml"
     # amueller's read/write key that he will throw away later
     apikey = "610344db6388d9ba34f6db45a3cf71de"
@@ -65,14 +72,14 @@ def setUp(self, n_levels: int = 1):
         abspath_this_file = os.path.abspath(inspect.getfile(self.__class__))
         static_cache_dir = os.path.dirname(abspath_this_file)
         for _ in range(n_levels):
-            static_cache_dir = os.path.abspath(os.path.join(static_cache_dir, '..'))
+            static_cache_dir = os.path.abspath(os.path.join(static_cache_dir, ".."))
         content = os.listdir(static_cache_dir)
-        if 'files' in content:
-            self.static_cache_dir = os.path.join(static_cache_dir, 'files')
+        if "files" in content:
+            self.static_cache_dir = os.path.join(static_cache_dir, "files")
 
         if self.static_cache_dir is None:
             raise ValueError(
-                'Cannot find test cache dir, expected it to be {}!'.format(static_cache_dir)
+                "Cannot find test cache dir, expected it to be {}!".format(static_cache_dir)
             )
 
         self.cwd = os.getcwd()
@@ -93,10 +100,10 @@ def setUp(self, n_levels: int = 1):
 
         # If we're on travis, we save the api key in the config file to allow
         # the notebook tests to read them.
-        if os.environ.get('TRAVIS') or os.environ.get('APPVEYOR'):
-            with lockutils.external_lock('config', lock_path=self.workdir):
-                with open(openml.config.config_file, 'w') as fh:
-                    fh.write('apikey = %s' % openml.config.apikey)
+        if os.environ.get("TRAVIS") or os.environ.get("APPVEYOR"):
+            with lockutils.external_lock("config", lock_path=self.workdir):
+                with open(openml.config.config_file, "w") as fh:
+                    fh.write("apikey = %s" % openml.config.apikey)
 
         # Increase the number of retries to avoid spurious server failures
         self.connection_n_retries = openml.config.connection_n_retries
@@ -107,7 +114,7 @@ def tearDown(self):
         try:
             shutil.rmtree(self.workdir)
         except PermissionError:
-            if os.name == 'nt':
+            if os.name == "nt":
                 # one of the files may still be used by another process
                 pass
             else:
@@ -139,14 +146,18 @@ def _delete_entity_from_tracker(self, entity_type, entity):
         if entity_type in TestBase.publish_tracker:
             # removes duplicate entries
             TestBase.publish_tracker[entity_type] = list(set(TestBase.publish_tracker[entity_type]))
-            if entity_type == 'flow':
-                delete_index = [i for i, (id_, _) in
-                                enumerate(TestBase.publish_tracker[entity_type])
-                                if id_ == entity][0]
+            if entity_type == "flow":
+                delete_index = [
+                    i
+                    for i, (id_, _) in enumerate(TestBase.publish_tracker[entity_type])
+                    if id_ == entity
+                ][0]
             else:
-                delete_index = [i for i, id_ in
-                                enumerate(TestBase.publish_tracker[entity_type])
-                                if id_ == entity][0]
+                delete_index = [
+                    i
+                    for i, id_ in enumerate(TestBase.publish_tracker[entity_type])
+                    if id_ == entity
+                ][0]
             TestBase.publish_tracker[entity_type].pop(delete_index)
 
     def _get_sentinel(self, sentinel=None):
@@ -155,10 +166,10 @@ def _get_sentinel(self, sentinel=None):
             # is identified by its name and external version online. Having a
             # unique name allows us to publish the same flow in each test run.
             md5 = hashlib.md5()
-            md5.update(str(time.time()).encode('utf-8'))
-            md5.update(str(os.getpid()).encode('utf-8'))
+            md5.update(str(time.time()).encode("utf-8"))
+            md5.update(str(os.getpid()).encode("utf-8"))
             sentinel = md5.hexdigest()[:10]
-            sentinel = 'TEST%s' % sentinel
+            sentinel = "TEST%s" % sentinel
         return sentinel
 
     def _add_sentinel_to_flow_name(self, flow, sentinel=None):
@@ -167,7 +178,7 @@ def _add_sentinel_to_flow_name(self, flow, sentinel=None):
         flows_to_visit.append(flow)
         while len(flows_to_visit) > 0:
             current_flow = flows_to_visit.pop()
-            current_flow.name = '%s%s' % (sentinel, current_flow.name)
+            current_flow.name = "%s%s" % (sentinel, current_flow.name)
             for subflow in current_flow.components.values():
                 flows_to_visit.append(subflow)
 
@@ -176,12 +187,11 @@ def _add_sentinel_to_flow_name(self, flow, sentinel=None):
     def _check_dataset(self, dataset):
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
-        self.assertIn('did', dataset)
-        self.assertIsInstance(dataset['did'], int)
-        self.assertIn('status', dataset)
-        self.assertIsInstance(dataset['status'], str)
-        self.assertIn(dataset['status'], ['in_preparation', 'active',
-                                          'deactivated'])
+        self.assertIn("did", dataset)
+        self.assertIsInstance(dataset["did"], int)
+        self.assertIn("status", dataset)
+        self.assertIsInstance(dataset["status"], str)
+        self.assertIn(dataset["status"], ["in_preparation", "active", "deactivated"])
 
     def _check_fold_timing_evaluations(
         self,
@@ -206,26 +216,25 @@ def _check_fold_timing_evaluations(
         # maximum allowed value
         check_measures = {
             # should take at least one millisecond (?)
-            'usercpu_time_millis_testing': (0, max_time_allowed),
-            'usercpu_time_millis_training': (0, max_time_allowed),
-            'usercpu_time_millis': (0, max_time_allowed),
-            'wall_clock_time_millis_training': (0, max_time_allowed),
-            'wall_clock_time_millis_testing': (0, max_time_allowed),
-            'wall_clock_time_millis': (0, max_time_allowed),
+            "usercpu_time_millis_testing": (0, max_time_allowed),
+            "usercpu_time_millis_training": (0, max_time_allowed),
+            "usercpu_time_millis": (0, max_time_allowed),
+            "wall_clock_time_millis_training": (0, max_time_allowed),
+            "wall_clock_time_millis_testing": (0, max_time_allowed),
+            "wall_clock_time_millis": (0, max_time_allowed),
         }
 
         if check_scores:
             if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE):
-                check_measures['predictive_accuracy'] = (0, 1.)
+                check_measures["predictive_accuracy"] = (0, 1.0)
             elif task_type == TaskTypeEnum.SUPERVISED_REGRESSION:
-                check_measures['mean_absolute_error'] = (0, float("inf"))
+                check_measures["mean_absolute_error"] = (0, float("inf"))
 
         self.assertIsInstance(fold_evaluations, dict)
         if sys.version_info[:2] >= (3, 3):
             # this only holds if we are allowed to record time (otherwise some
             # are missing)
-            self.assertEqual(set(fold_evaluations.keys()),
-                             set(check_measures.keys()))
+            self.assertEqual(set(fold_evaluations.keys()), set(check_measures.keys()))
 
         for measure in check_measures.keys():
             if measure in fold_evaluations:
@@ -249,4 +258,4 @@ def _check_fold_timing_evaluations(
     from sklearn.preprocessing import Imputer as SimpleImputer
 
 
-__all__ = ['TestBase', 'SimpleImputer']
+__all__ = ["TestBase", "SimpleImputer"]
diff --git a/openml/utils.py b/openml/utils.py
index 2815f1afd..a402564f9 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -25,6 +25,7 @@
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         from oslo_concurrency import lockutils
+
         oslo_installed = True
 except ImportError:
     pass
@@ -58,33 +59,34 @@ def extract_xml_tags(xml_tag_name, node, allow_none=True):
         elif isinstance(node[xml_tag_name], list):
             rval = node[xml_tag_name]
         else:
-            raise ValueError('Received not string and non list as tag item')
+            raise ValueError("Received not string and non list as tag item")
 
         return rval
     else:
         if allow_none:
             return None
         else:
-            raise ValueError("Could not find tag '%s' in node '%s'" %
-                             (xml_tag_name, str(node)))
+            raise ValueError("Could not find tag '%s' in node '%s'" % (xml_tag_name, str(node)))
 
 
-def _get_rest_api_type_alias(oml_object: 'OpenMLBase') -> str:
+def _get_rest_api_type_alias(oml_object: "OpenMLBase") -> str:
     """ Return the alias of the openml entity as it is defined for the REST API. """
     rest_api_mapping = [
-        (openml.datasets.OpenMLDataset, 'data'),
-        (openml.flows.OpenMLFlow, 'flow'),
-        (openml.tasks.OpenMLTask, 'task'),
-        (openml.runs.OpenMLRun, 'run'),
-        ((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), 'study')
+        (openml.datasets.OpenMLDataset, "data"),
+        (openml.flows.OpenMLFlow, "flow"),
+        (openml.tasks.OpenMLTask, "task"),
+        (openml.runs.OpenMLRun, "run"),
+        ((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), "study"),
     ]  # type: List[Tuple[Union[Type, Tuple], str]]
-    _, api_type_alias = [(python_type, api_alias)
-                         for (python_type, api_alias) in rest_api_mapping
-                         if isinstance(oml_object, python_type)][0]
+    _, api_type_alias = [
+        (python_type, api_alias)
+        for (python_type, api_alias) in rest_api_mapping
+        if isinstance(oml_object, python_type)
+    ][0]
     return api_type_alias
 
 
-def _tag_openml_base(oml_object: 'OpenMLBase', tag: str, untag: bool = False):
+def _tag_openml_base(oml_object: "OpenMLBase", tag: str, untag: bool = False):
     api_type_alias = _get_rest_api_type_alias(oml_object)
     _tag_entity(api_type_alias, oml_object.id, tag, untag)
 
@@ -115,25 +117,23 @@ def _tag_entity(entity_type, entity_id, tag, untag=False):
     tags : list
         List of tags that the entity is (still) tagged with
     """
-    legal_entities = {'data', 'task', 'flow', 'setup', 'run'}
+    legal_entities = {"data", "task", "flow", "setup", "run"}
     if entity_type not in legal_entities:
-        raise ValueError('Can\'t tag a %s' % entity_type)
+        raise ValueError("Can't tag a %s" % entity_type)
 
-    uri = '%s/tag' % entity_type
-    main_tag = 'oml:%s_tag' % entity_type
+    uri = "%s/tag" % entity_type
+    main_tag = "oml:%s_tag" % entity_type
     if untag:
-        uri = '%s/untag' % entity_type
-        main_tag = 'oml:%s_untag' % entity_type
+        uri = "%s/untag" % entity_type
+        main_tag = "oml:%s_untag" % entity_type
 
-    post_variables = {'%s_id' % entity_type: entity_id, 'tag': tag}
-    result_xml = openml._api_calls._perform_api_call(uri,
-                                                     'post',
-                                                     post_variables)
+    post_variables = {"%s_id" % entity_type: entity_id, "tag": tag}
+    result_xml = openml._api_calls._perform_api_call(uri, "post", post_variables)
 
-    result = xmltodict.parse(result_xml, force_list={'oml:tag'})[main_tag]
+    result = xmltodict.parse(result_xml, force_list={"oml:tag"})[main_tag]
 
-    if 'oml:tag' in result:
-        return result['oml:tag']
+    if "oml:tag" in result:
+        return result["oml:tag"]
     else:
         # no tags, return empty list
         return []
@@ -160,27 +160,26 @@ def _delete_entity(entity_type, entity_id):
         True iff the deletion was successful. False otherwse
     """
     legal_entities = {
-        'data',
-        'flow',
-        'task',
-        'run',
-        'study',
-        'user',
+        "data",
+        "flow",
+        "task",
+        "run",
+        "study",
+        "user",
     }
     if entity_type not in legal_entities:
-        raise ValueError('Can\'t delete a %s' % entity_type)
+        raise ValueError("Can't delete a %s" % entity_type)
 
-    url_suffix = '%s/%d' % (entity_type, entity_id)
-    result_xml = openml._api_calls._perform_api_call(url_suffix,
-                                                     'delete')
+    url_suffix = "%s/%d" % (entity_type, entity_id)
+    result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
     result = xmltodict.parse(result_xml)
-    if 'oml:%s_delete' % entity_type in result:
+    if "oml:%s_delete" % entity_type in result:
         return True
     else:
         return False
 
 
-def _list_all(listing_call, output_format='dict', *args, **filters):
+def _list_all(listing_call, output_format="dict", *args, **filters):
     """Helper to handle paged listing requests.
 
     Example usage:
@@ -207,34 +206,33 @@ def _list_all(listing_call, output_format='dict', *args, **filters):
     """
 
     # eliminate filters that have a None value
-    active_filters = {key: value for key, value in filters.items()
-                      if value is not None}
+    active_filters = {key: value for key, value in filters.items() if value is not None}
     page = 0
     result = collections.OrderedDict()
-    if output_format == 'dataframe':
+    if output_format == "dataframe":
         result = pd.DataFrame()
 
     # Default batch size per paging.
     # This one can be set in filters (batch_size), but should not be
     # changed afterwards. The derived batch_size can be changed.
     BATCH_SIZE_ORIG = 10000
-    if 'batch_size' in active_filters:
-        BATCH_SIZE_ORIG = active_filters['batch_size']
-        del active_filters['batch_size']
+    if "batch_size" in active_filters:
+        BATCH_SIZE_ORIG = active_filters["batch_size"]
+        del active_filters["batch_size"]
 
     # max number of results to be shown
     LIMIT = None
     offset = 0
-    if 'size' in active_filters:
-        LIMIT = active_filters['size']
-        del active_filters['size']
+    if "size" in active_filters:
+        LIMIT = active_filters["size"]
+        del active_filters["size"]
 
     if LIMIT is not None and BATCH_SIZE_ORIG > LIMIT:
         BATCH_SIZE_ORIG = LIMIT
 
-    if 'offset' in active_filters:
-        offset = active_filters['offset']
-        del active_filters['offset']
+    if "offset" in active_filters:
+        offset = active_filters["offset"]
+        del active_filters["offset"]
 
     batch_size = BATCH_SIZE_ORIG
     while True:
@@ -250,7 +248,7 @@ def _list_all(listing_call, output_format='dict', *args, **filters):
         except openml.exceptions.OpenMLServerNoResult:
             # we want to return an empty dict in this case
             break
-        if output_format == 'dataframe':
+        if output_format == "dataframe":
             if len(result) == 0:
                 result = new_batch
             else:
@@ -305,13 +303,11 @@ def _create_cache_directory_for_id(key, id_):
     str
         Path of the created dataset cache directory.
     """
-    cache_dir = os.path.join(
-        _create_cache_directory(key), str(id_)
-    )
+    cache_dir = os.path.join(_create_cache_directory(key), str(id_))
     if os.path.exists(cache_dir) and os.path.isdir(cache_dir):
         pass
     elif os.path.exists(cache_dir) and not os.path.isdir(cache_dir):
-        raise ValueError('%s cache dir exists but is not a directory!' % key)
+        raise ValueError("%s cache dir exists but is not a directory!" % key)
     else:
         os.makedirs(cache_dir)
     return cache_dir
@@ -331,35 +327,41 @@ def _remove_cache_dir_for_id(key, cache_dir):
     try:
         shutil.rmtree(cache_dir)
     except (OSError, IOError):
-        raise ValueError('Cannot remove faulty %s cache directory %s.'
-                         'Please do this manually!' % (key, cache_dir))
+        raise ValueError(
+            "Cannot remove faulty %s cache directory %s."
+            "Please do this manually!" % (key, cache_dir)
+        )
 
 
 def thread_safe_if_oslo_installed(func):
     if oslo_installed:
+
         @wraps(func)
         def safe_func(*args, **kwargs):
             # Lock directories use the id that is passed as either positional or keyword argument.
-            id_parameters = [parameter_name for parameter_name in kwargs if '_id' in parameter_name]
+            id_parameters = [parameter_name for parameter_name in kwargs if "_id" in parameter_name]
             if len(id_parameters) == 1:
                 id_ = kwargs[id_parameters[0]]
             elif len(args) > 0:
                 id_ = args[0]
             else:
-                raise RuntimeError("An id must be specified for {}, was passed: ({}, {}).".format(
-                    func.__name__, args, kwargs
-                ))
+                raise RuntimeError(
+                    "An id must be specified for {}, was passed: ({}, {}).".format(
+                        func.__name__, args, kwargs
+                    )
+                )
             # The [7:] gets rid of the 'openml.' prefix
             lock_name = "{}.{}:{}".format(func.__module__[7:], func.__name__, id_)
             with lockutils.external_lock(name=lock_name, lock_path=_create_lockfiles_dir()):
                 return func(*args, **kwargs)
+
         return safe_func
     else:
         return func
 
 
 def _create_lockfiles_dir():
-    dir = os.path.join(config.get_cache_directory(), 'locks')
+    dir = os.path.join(config.get_cache_directory(), "locks")
     try:
         os.makedirs(dir)
     except OSError:
diff --git a/setup.py b/setup.py
index c55888b19..f1f7a5871 100644
--- a/setup.py
+++ b/setup.py
@@ -11,86 +11,90 @@
 
 if sys.version_info < (3, 6):
     raise ValueError(
-        'Unsupported Python version {}.{}.{} found. OpenML requires Python 3.6 or higher.'
-        .format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
+        "Unsupported Python version {}.{}.{} found. OpenML requires Python 3.6 or higher.".format(
+            sys.version_info.major, sys.version_info.minor, sys.version_info.micro
+        )
     )
 
 with open(os.path.join("README.md")) as fid:
     README = fid.read()
 
-setuptools.setup(name="openml",
-                 author="Matthias Feurer, Jan van Rijn, Arlind Kadra, Pieter Gijsbers, "
-                        "Neeratyoy Mallik, Sahithya Ravi, Andreas Müller, Joaquin Vanschoren "
-                        "and Frank Hutter",
-                 author_email="feurerm@informatik.uni-freiburg.de",
-                 maintainer="Matthias Feurer",
-                 maintainer_email="feurerm@informatik.uni-freiburg.de",
-                 description="Python API for OpenML",
-                 long_description=README,
-                 long_description_content_type='text/markdown',
-                 license="BSD 3-clause",
-                 url="http://openml.org/",
-                 project_urls={
-                     "Documentation": "https://openml.github.io/openml-python/",
-                     "Source Code": "https://github.com/openml/openml-python"
-                 },
-                 version=version,
-                 # Make sure to remove stale files such as the egg-info before updating this:
-                 # https://stackoverflow.com/a/26547314
-                 packages=setuptools.find_packages(
-                     include=['openml.*', 'openml'],
-                     exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
-                 ),
-                 package_data={'': ['*.txt', '*.md']},
-                 python_requires=">=3.6",
-                 install_requires=[
-                     'liac-arff>=2.4.0',
-                     'xmltodict',
-                     'requests',
-                     'scikit-learn>=0.18',
-                     'python-dateutil',  # Installed through pandas anyway.
-                     'pandas>=1.0.0',
-                     'scipy>=0.13.3',
-                     'numpy>=1.6.2',
-                 ],
-                 extras_require={
-                     'test': [
-                         'nbconvert',
-                         'jupyter_client',
-                         'matplotlib',
-                         'pytest',
-                         'pytest-xdist',
-                         'pytest-timeout',
-                         'nbformat',
-                         'oslo.concurrency',
-                         'flaky',
-                         'pyarrow'
-                     ],
-                     'examples': [
-                         'matplotlib',
-                         'jupyter',
-                         'notebook',
-                         'nbconvert',
-                         'nbformat',
-                         'jupyter_client',
-                         'ipython',
-                         'ipykernel',
-                         'seaborn'
-                     ],
-                     'examples_unix': [
-                         'fanova',
-                     ]
-                 },
-                 test_suite="pytest",
-                 classifiers=['Intended Audience :: Science/Research',
-                              'Intended Audience :: Developers',
-                              'License :: OSI Approved :: BSD License',
-                              'Programming Language :: Python',
-                              'Topic :: Software Development',
-                              'Topic :: Scientific/Engineering',
-                              'Operating System :: POSIX',
-                              'Operating System :: Unix',
-                              'Operating System :: MacOS',
-                              'Programming Language :: Python :: 3',
-                              'Programming Language :: Python :: 3.6',
-                              'Programming Language :: Python :: 3.7'])
+setuptools.setup(
+    name="openml",
+    author="Matthias Feurer, Jan van Rijn, Arlind Kadra, Pieter Gijsbers, "
+    "Neeratyoy Mallik, Sahithya Ravi, Andreas Müller, Joaquin Vanschoren "
+    "and Frank Hutter",
+    author_email="feurerm@informatik.uni-freiburg.de",
+    maintainer="Matthias Feurer",
+    maintainer_email="feurerm@informatik.uni-freiburg.de",
+    description="Python API for OpenML",
+    long_description=README,
+    long_description_content_type="text/markdown",
+    license="BSD 3-clause",
+    url="http://openml.org/",
+    project_urls={
+        "Documentation": "https://openml.github.io/openml-python/",
+        "Source Code": "https://github.com/openml/openml-python",
+    },
+    version=version,
+    # Make sure to remove stale files such as the egg-info before updating this:
+    # https://stackoverflow.com/a/26547314
+    packages=setuptools.find_packages(
+        include=["openml.*", "openml"], exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
+    ),
+    package_data={"": ["*.txt", "*.md"]},
+    python_requires=">=3.6",
+    install_requires=[
+        "liac-arff>=2.4.0",
+        "xmltodict",
+        "requests",
+        "scikit-learn>=0.18",
+        "python-dateutil",  # Installed through pandas anyway.
+        "pandas>=1.0.0",
+        "scipy>=0.13.3",
+        "numpy>=1.6.2",
+    ],
+    extras_require={
+        "test": [
+            "nbconvert",
+            "jupyter_client",
+            "matplotlib",
+            "pytest",
+            "pytest-xdist",
+            "pytest-timeout",
+            "nbformat",
+            "oslo.concurrency",
+            "flaky",
+            "pyarrow",
+            "pre-commit",
+            "pytest-cov",
+        ],
+        "examples": [
+            "matplotlib",
+            "jupyter",
+            "notebook",
+            "nbconvert",
+            "nbformat",
+            "jupyter_client",
+            "ipython",
+            "ipykernel",
+            "seaborn",
+        ],
+        "examples_unix": ["fanova",],
+    },
+    test_suite="pytest",
+    classifiers=[
+        "Intended Audience :: Science/Research",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: BSD License",
+        "Programming Language :: Python",
+        "Topic :: Software Development",
+        "Topic :: Scientific/Engineering",
+        "Operating System :: POSIX",
+        "Operating System :: Unix",
+        "Operating System :: MacOS",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+    ],
+)
diff --git a/tests/__init__.py b/tests/__init__.py
index b71163cb2..245c252db 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -2,4 +2,4 @@
 
 # Dummy to allow mock classes in the test files to have a version number for
 # their parent module
-__version__ = '0.1'
+__version__ = "0.1"
diff --git a/tests/conftest.py b/tests/conftest.py
index ae8f0dfa9..59fa33aca 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,4 @@
-'''This file is recognized by pytest for defining specified behaviour
+"""This file is recognized by pytest for defining specified behaviour
 
 'conftest.py' files are directory-scope files that are shared by all
 sub-directories from where this file is placed. pytest recognises
@@ -18,7 +18,7 @@
 Possible Future: class TestBase from openml/testing.py can be included
     under this file and there would not be any requirements to import
     testing.py in each of the unit test modules.
-'''
+"""
 
 # License: BSD 3-Clause
 
@@ -42,32 +42,32 @@
 logger.info("static directory: {}".format(static_dir))
 print("static directory: {}".format(static_dir))
 while True:
-    if 'openml' in os.listdir(static_dir):
+    if "openml" in os.listdir(static_dir):
         break
-    static_dir = os.path.join(static_dir, '..')
+    static_dir = os.path.join(static_dir, "..")
 
 
 def worker_id() -> str:
-    ''' Returns the name of the worker process owning this function call.
+    """ Returns the name of the worker process owning this function call.
 
     :return: str
         Possible outputs from the set of {'master', 'gw0', 'gw1', ..., 'gw(n-1)'}
         where n is the number of workers being used by pytest-xdist
-    '''
+    """
     vars_ = list(os.environ.keys())
-    if 'PYTEST_XDIST_WORKER' in vars_ or 'PYTEST_XDIST_WORKER_COUNT' in vars_:
-        return os.environ['PYTEST_XDIST_WORKER']
+    if "PYTEST_XDIST_WORKER" in vars_ or "PYTEST_XDIST_WORKER_COUNT" in vars_:
+        return os.environ["PYTEST_XDIST_WORKER"]
     else:
-        return 'master'
+        return "master"
 
 
 def read_file_list() -> List[str]:
-    '''Returns a list of paths to all files that currently exist in 'openml/tests/files/'
+    """Returns a list of paths to all files that currently exist in 'openml/tests/files/'
 
     :return: List[str]
-    '''
-    directory = os.path.join(static_dir, 'tests/files/')
-    if worker_id() == 'master':
+    """
+    directory = os.path.join(static_dir, "tests/files/")
+    if worker_id() == "master":
         logger.info("Collecting file lists from: {}".format(directory))
     files = os.walk(directory)
     file_list = []
@@ -78,12 +78,12 @@ def read_file_list() -> List[str]:
 
 
 def compare_delete_files(old_list, new_list) -> None:
-    '''Deletes files that are there in the new_list but not in the old_list
+    """Deletes files that are there in the new_list but not in the old_list
 
     :param old_list: List[str]
     :param new_list: List[str]
     :return: None
-    '''
+    """
     file_list = list(set(new_list) - set(old_list))
     for file in file_list:
         os.remove(file)
@@ -91,7 +91,7 @@ def compare_delete_files(old_list, new_list) -> None:
 
 
 def delete_remote_files(tracker) -> None:
-    '''Function that deletes the entities passed as input, from the OpenML test server
+    """Function that deletes the entities passed as input, from the OpenML test server
 
     The TestBase class in openml/testing.py has an attribute called publish_tracker.
     This function expects the dictionary of the same structure.
@@ -103,21 +103,23 @@ def delete_remote_files(tracker) -> None:
 
     :param tracker: Dict
     :return: None
-    '''
+    """
     openml.config.server = TestBase.test_server
     openml.config.apikey = TestBase.apikey
 
     # reordering to delete sub flows at the end of flows
     # sub-flows have shorter names, hence, sorting by descending order of flow name length
-    if 'flow' in tracker:
-        flow_deletion_order = [entity_id for entity_id, _ in
-                               sorted(tracker['flow'], key=lambda x: len(x[1]), reverse=True)]
-        tracker['flow'] = flow_deletion_order
+    if "flow" in tracker:
+        flow_deletion_order = [
+            entity_id
+            for entity_id, _ in sorted(tracker["flow"], key=lambda x: len(x[1]), reverse=True)
+        ]
+        tracker["flow"] = flow_deletion_order
 
     # deleting all collected entities published to test server
     # 'run's are deleted first to prevent dependency issue of entities on deletion
-    logger.info("Entity Types: {}".format(['run', 'data', 'flow', 'task', 'study']))
-    for entity_type in ['run', 'data', 'flow', 'task', 'study']:
+    logger.info("Entity Types: {}".format(["run", "data", "flow", "task", "study"]))
+    for entity_type in ["run", "data", "flow", "task", "study"]:
         logger.info("Deleting {}s...".format(entity_type))
         for i, entity in enumerate(tracker[entity_type]):
             try:
@@ -128,7 +130,7 @@ def delete_remote_files(tracker) -> None:
 
 
 def pytest_sessionstart() -> None:
-    '''pytest hook that is executed before any unit test starts
+    """pytest hook that is executed before any unit test starts
 
     This function will be called by each of the worker processes, along with the master process
     when they are spawned. This happens even before the collection of unit tests.
@@ -141,16 +143,16 @@ def pytest_sessionstart() -> None:
     store a list of strings of paths of all files in the directory (pre-unit test snapshot).
 
     :return: None
-    '''
+    """
     # file_list is global to maintain the directory snapshot during tear down
     global file_list
     worker = worker_id()
-    if worker == 'master':
+    if worker == "master":
         file_list = read_file_list()
 
 
 def pytest_sessionfinish() -> None:
-    '''pytest hook that is executed after all unit tests of a worker ends
+    """pytest hook that is executed after all unit tests of a worker ends
 
     This function will be called by each of the worker processes, along with the master process
     when they are done with the unit tests allocated to them.
@@ -164,7 +166,7 @@ def pytest_sessionfinish() -> None:
     * Iterates over the list of entities uploaded to test server and deletes them remotely
 
     :return: None
-    '''
+    """
     # allows access to the file_list read in the set up phase
     global file_list
     worker = worker_id()
@@ -174,7 +176,7 @@ def pytest_sessionfinish() -> None:
     logger.info("Deleting files uploaded to test server for worker {}".format(worker))
     delete_remote_files(TestBase.publish_tracker)
 
-    if worker == 'master':
+    if worker == "master":
         # Local file deletion
         new_file_list = read_file_list()
         compare_delete_files(file_list, new_file_list)
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 986dca4c1..fcc6eddc7 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -34,100 +34,96 @@ def setUp(self):
     def test_repr(self):
         # create a bare-bones dataset as would be returned by
         # create_dataset
-        data = openml.datasets.OpenMLDataset(name="somename",
-                                             description="a description")
+        data = openml.datasets.OpenMLDataset(name="somename", description="a description")
         str(data)
 
     def test_init_string_validation(self):
         with pytest.raises(ValueError, match="Invalid symbols in name"):
-            openml.datasets.OpenMLDataset(name="some name",
-                                          description="a description")
+            openml.datasets.OpenMLDataset(name="some name", description="a description")
 
         with pytest.raises(ValueError, match="Invalid symbols in description"):
-            openml.datasets.OpenMLDataset(name="somename",
-                                          description="a descriptïon")
+            openml.datasets.OpenMLDataset(name="somename", description="a descriptïon")
 
         with pytest.raises(ValueError, match="Invalid symbols in citation"):
-            openml.datasets.OpenMLDataset(name="somename",
-                                          description="a description",
-                                          citation="Something by Müller")
+            openml.datasets.OpenMLDataset(
+                name="somename", description="a description", citation="Something by Müller"
+            )
 
     def test_get_data_array(self):
         # Basic usage
-        rval, _, categorical, attribute_names = self.dataset.get_data(dataset_format='array')
+        rval, _, categorical, attribute_names = self.dataset.get_data(dataset_format="array")
         self.assertIsInstance(rval, np.ndarray)
         self.assertEqual(rval.dtype, np.float32)
         self.assertEqual((898, 39), rval.shape)
         self.assertEqual(len(categorical), 39)
         self.assertTrue(all([isinstance(cat, bool) for cat in categorical]))
         self.assertEqual(len(attribute_names), 39)
-        self.assertTrue(all([isinstance(att, str)
-                             for att in attribute_names]))
+        self.assertTrue(all([isinstance(att, str) for att in attribute_names]))
         self.assertIsNone(_)
 
         # check that an error is raised when the dataset contains string
         err_msg = "PyOpenML cannot handle string when returning numpy arrays"
         with pytest.raises(PyOpenMLError, match=err_msg):
-            self.titanic.get_data(dataset_format='array')
+            self.titanic.get_data(dataset_format="array")
 
     def test_get_data_pandas(self):
-        data, _, _, _ = self.titanic.get_data(dataset_format='dataframe')
+        data, _, _, _ = self.titanic.get_data(dataset_format="dataframe")
         self.assertTrue(isinstance(data, pd.DataFrame))
         self.assertEqual(data.shape[1], len(self.titanic.features))
         self.assertEqual(data.shape[0], 1309)
         col_dtype = {
-            'pclass': 'float64',
-            'survived': 'category',
-            'name': 'object',
-            'sex': 'category',
-            'age': 'float64',
-            'sibsp': 'float64',
-            'parch': 'float64',
-            'ticket': 'object',
-            'fare': 'float64',
-            'cabin': 'object',
-            'embarked': 'category',
-            'boat': 'object',
-            'body': 'float64',
-            'home.dest': 'object'
+            "pclass": "float64",
+            "survived": "category",
+            "name": "object",
+            "sex": "category",
+            "age": "float64",
+            "sibsp": "float64",
+            "parch": "float64",
+            "ticket": "object",
+            "fare": "float64",
+            "cabin": "object",
+            "embarked": "category",
+            "boat": "object",
+            "body": "float64",
+            "home.dest": "object",
         }
         for col_name in data.columns:
             self.assertTrue(data[col_name].dtype.name == col_dtype[col_name])
 
         X, y, _, _ = self.titanic.get_data(
-            dataset_format='dataframe',
-            target=self.titanic.default_target_attribute)
+            dataset_format="dataframe", target=self.titanic.default_target_attribute
+        )
         self.assertTrue(isinstance(X, pd.DataFrame))
         self.assertTrue(isinstance(y, pd.Series))
         self.assertEqual(X.shape, (1309, 13))
         self.assertEqual(y.shape, (1309,))
         for col_name in X.columns:
             self.assertTrue(X[col_name].dtype.name == col_dtype[col_name])
-        self.assertTrue(y.dtype.name == col_dtype['survived'])
+        self.assertTrue(y.dtype.name == col_dtype["survived"])
 
     def test_get_data_boolean_pandas(self):
         # test to check that we are converting properly True and False even
         # with some inconsistency when dumping the data on openml
         data, _, _, _ = self.jm1.get_data()
-        self.assertTrue(data['defects'].dtype.name == 'category')
-        self.assertTrue(set(data['defects'].cat.categories) == {True, False})
+        self.assertTrue(data["defects"].dtype.name == "category")
+        self.assertTrue(set(data["defects"].cat.categories) == {True, False})
 
         data, _, _, _ = self.pc4.get_data()
-        self.assertTrue(data['c'].dtype.name == 'category')
-        self.assertTrue(set(data['c'].cat.categories) == {True, False})
+        self.assertTrue(data["c"].dtype.name == "category")
+        self.assertTrue(set(data["c"].cat.categories) == {True, False})
 
     def test_get_data_no_str_data_for_nparrays(self):
         # check that an error is raised when the dataset contains string
         err_msg = "PyOpenML cannot handle string when returning numpy arrays"
         with pytest.raises(PyOpenMLError, match=err_msg):
-            self.titanic.get_data(dataset_format='array')
+            self.titanic.get_data(dataset_format="array")
 
     def test_get_data_with_rowid(self):
         self.dataset.row_id_attribute = "condition"
         rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
         self.assertIsInstance(rval, pd.DataFrame)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = 'category' if is_cat else 'float64'
+            expected_type = "category" if is_cat else "float64"
             self.assertEqual(dtype.name, expected_type)
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
@@ -135,18 +131,18 @@ def test_get_data_with_rowid(self):
         rval, _, categorical, _ = self.dataset.get_data()
         self.assertIsInstance(rval, pd.DataFrame)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = 'category' if is_cat else 'float64'
+            expected_type = "category" if is_cat else "float64"
             self.assertEqual(dtype.name, expected_type)
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)
 
     def test_get_data_with_target_array(self):
-        X, y, _, attribute_names = self.dataset.get_data(dataset_format='array', target="class")
+        X, y, _, attribute_names = self.dataset.get_data(dataset_format="array", target="class")
         self.assertIsInstance(X, np.ndarray)
         self.assertEqual(X.dtype, np.float32)
         self.assertEqual(X.shape, (898, 38))
         self.assertIn(y.dtype, [np.int32, np.int64])
-        self.assertEqual(y.shape, (898, ))
+        self.assertEqual(y.shape, (898,))
         self.assertEqual(len(attribute_names), 38)
         self.assertNotIn("class", attribute_names)
 
@@ -154,14 +150,14 @@ def test_get_data_with_target_pandas(self):
         X, y, categorical, attribute_names = self.dataset.get_data(target="class")
         self.assertIsInstance(X, pd.DataFrame)
         for (dtype, is_cat) in zip(X.dtypes, categorical):
-            expected_type = 'category' if is_cat else 'float64'
+            expected_type = "category" if is_cat else "float64"
             self.assertEqual(dtype.name, expected_type)
         self.assertIsInstance(y, pd.Series)
-        self.assertEqual(y.dtype.name, 'category')
+        self.assertEqual(y.dtype.name, "category")
 
         self.assertEqual(X.shape, (898, 38))
         self.assertEqual(len(attribute_names), 38)
-        self.assertEqual(y.shape, (898, ))
+        self.assertEqual(y.shape, (898,))
 
         self.assertNotIn("class", attribute_names)
 
@@ -173,20 +169,20 @@ def test_get_data_rowid_and_ignore_and_target(self):
         self.assertEqual(len(categorical), 36)
         cats = [True] * 3 + [False, True, True, False] + [True] * 23 + [False] * 3 + [True] * 3
         self.assertListEqual(categorical, cats)
-        self.assertEqual(y.shape, (898, ))
+        self.assertEqual(y.shape, (898,))
 
     def test_get_data_with_ignore_attributes(self):
         self.dataset.ignore_attribute = ["condition"]
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = 'category' if is_cat else 'float64'
+            expected_type = "category" if is_cat else "float64"
             self.assertEqual(dtype.name, expected_type)
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
         for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = 'category' if is_cat else 'float64'
+            expected_type = "category" if is_cat else "float64"
             self.assertEqual(dtype.name, expected_type)
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)
@@ -194,22 +190,18 @@ def test_get_data_with_ignore_attributes(self):
     def test_dataset_format_constructor(self):
 
         with catch_warnings():
-            filterwarnings('error')
+            filterwarnings("error")
             self.assertRaises(
-                DeprecationWarning,
-                openml.OpenMLDataset,
-                'Test',
-                'Test',
-                format='arff'
+                DeprecationWarning, openml.OpenMLDataset, "Test", "Test", format="arff"
             )
 
     def test_get_data_with_nonexisting_class(self):
         # This class is using the anneal dataset with labels [1, 2, 3, 4, 5, 'U']. However,
         # label 4 does not exist and we test that the features 5 and 'U' are correctly mapped to
         # indices 4 and 5, and that nothing is mapped to index 3.
-        _, y, _, _ = self.dataset.get_data('class', dataset_format='dataframe')
-        self.assertEqual(list(y.dtype.categories), ['1', '2', '3', '4', '5', 'U'])
-        _, y, _, _ = self.dataset.get_data('class', dataset_format='array')
+        _, y, _, _ = self.dataset.get_data("class", dataset_format="dataframe")
+        self.assertEqual(list(y.dtype.categories), ["1", "2", "3", "4", "5", "U"])
+        _, y, _, _ = self.dataset.get_data("class", dataset_format="array")
         self.assertEqual(np.min(y), 0)
         self.assertEqual(np.max(y), 5)
         # Check that no label is mapped to 3, since it is reserved for label '4'.
@@ -258,7 +250,7 @@ def setUp(self):
 
     def test_get_sparse_dataset_with_target(self):
         X, y, _, attribute_names = self.sparse_dataset.get_data(
-            dataset_format='array', target="class"
+            dataset_format="array", target="class"
         )
 
         self.assertTrue(sparse.issparse(X))
@@ -267,13 +259,13 @@ def test_get_sparse_dataset_with_target(self):
 
         self.assertIsInstance(y, np.ndarray)
         self.assertIn(y.dtype, [np.int32, np.int64])
-        self.assertEqual(y.shape, (600, ))
+        self.assertEqual(y.shape, (600,))
 
         self.assertEqual(len(attribute_names), 20000)
         self.assertNotIn("class", attribute_names)
 
     def test_get_sparse_dataset(self):
-        rval, _, categorical, attribute_names = self.sparse_dataset.get_data(dataset_format='array')
+        rval, _, categorical, attribute_names = self.sparse_dataset.get_data(dataset_format="array")
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
         self.assertEqual((600, 20001), rval.shape)
@@ -288,13 +280,14 @@ def test_get_sparse_dataframe(self):
         rval, *_ = self.sparse_dataset.get_data()
         self.assertIsInstance(rval, pd.DataFrame)
         np.testing.assert_array_equal(
-            [pd.SparseDtype(np.float32, fill_value=0.0)] * len(rval.dtypes), rval.dtypes)
+            [pd.SparseDtype(np.float32, fill_value=0.0)] * len(rval.dtypes), rval.dtypes
+        )
         self.assertEqual((600, 20001), rval.shape)
 
     def test_get_sparse_dataset_with_rowid(self):
         self.sparse_dataset.row_id_attribute = ["V256"]
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_row_id=True
+            dataset_format="array", include_row_id=True
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
@@ -302,7 +295,7 @@ def test_get_sparse_dataset_with_rowid(self):
         self.assertEqual(len(categorical), 20001)
 
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_row_id=False
+            dataset_format="array", include_row_id=False
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
@@ -312,7 +305,7 @@ def test_get_sparse_dataset_with_rowid(self):
     def test_get_sparse_dataset_with_ignore_attributes(self):
         self.sparse_dataset.ignore_attribute = ["V256"]
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_ignore_attribute=True
+            dataset_format="array", include_ignore_attribute=True
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
@@ -320,7 +313,7 @@ def test_get_sparse_dataset_with_ignore_attributes(self):
 
         self.assertEqual(len(categorical), 20001)
         rval, _, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array', include_ignore_attribute=False
+            dataset_format="array", include_ignore_attribute=False
         )
         self.assertTrue(sparse.issparse(rval))
         self.assertEqual(rval.dtype, np.float32)
@@ -332,7 +325,7 @@ def test_get_sparse_dataset_rowid_and_ignore_and_target(self):
         self.sparse_dataset.ignore_attribute = ["V256"]
         self.sparse_dataset.row_id_attribute = ["V512"]
         X, y, categorical, _ = self.sparse_dataset.get_data(
-            dataset_format='array',
+            dataset_format="array",
             target="class",
             include_row_id=False,
             include_ignore_attribute=False,
@@ -344,29 +337,29 @@ def test_get_sparse_dataset_rowid_and_ignore_and_target(self):
 
         self.assertEqual(len(categorical), 19998)
         self.assertListEqual(categorical, [False] * 19998)
-        self.assertEqual(y.shape, (600, ))
+        self.assertEqual(y.shape, (600,))
 
     def test_get_sparse_categorical_data_id_395(self):
         dataset = openml.datasets.get_dataset(395, download_data=True)
         feature = dataset.features[3758]
         self.assertTrue(isinstance(dataset, OpenMLDataset))
         self.assertTrue(isinstance(feature, OpenMLDataFeature))
-        self.assertEqual(dataset.name, 're1.wc')
-        self.assertEqual(feature.name, 'CLASS_LABEL')
-        self.assertEqual(feature.data_type, 'nominal')
+        self.assertEqual(dataset.name, "re1.wc")
+        self.assertEqual(feature.name, "CLASS_LABEL")
+        self.assertEqual(feature.data_type, "nominal")
         self.assertEqual(len(feature.nominal_values), 25)
 
 
 class OpenMLDatasetQualityTest(TestBase):
     def test__check_qualities(self):
-        qualities = [{'oml:name': 'a', 'oml:value': '0.5'}]
+        qualities = [{"oml:name": "a", "oml:value": "0.5"}]
         qualities = openml.datasets.dataset._check_qualities(qualities)
-        self.assertEqual(qualities['a'], 0.5)
+        self.assertEqual(qualities["a"], 0.5)
 
-        qualities = [{'oml:name': 'a', 'oml:value': 'null'}]
+        qualities = [{"oml:name": "a", "oml:value": "null"}]
         qualities = openml.datasets.dataset._check_qualities(qualities)
-        self.assertNotEqual(qualities['a'], qualities['a'])
+        self.assertNotEqual(qualities["a"], qualities["a"])
 
-        qualities = [{'oml:name': 'a', 'oml:value': None}]
+        qualities = [{"oml:name": "a", "oml:value": None}]
         qualities = openml.datasets.dataset._check_qualities(qualities)
-        self.assertNotEqual(qualities['a'], qualities['a'])
+        self.assertNotEqual(qualities["a"], qualities["a"])
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 9c01c57e7..958d28d94 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -16,23 +16,24 @@
 
 import openml
 from openml import OpenMLDataset
-from openml.exceptions import OpenMLCacheException, OpenMLHashException, \
-    OpenMLPrivateDatasetError
+from openml.exceptions import OpenMLCacheException, OpenMLHashException, OpenMLPrivateDatasetError
 from openml.testing import TestBase
 from openml.utils import _tag_entity, _create_cache_directory_for_id
-from openml.datasets.functions import (create_dataset,
-                                       attributes_arff_from_df,
-                                       _get_cached_dataset,
-                                       _get_cached_dataset_features,
-                                       _get_cached_dataset_qualities,
-                                       _get_cached_datasets,
-                                       _get_dataset_arff,
-                                       _get_dataset_description,
-                                       _get_dataset_features,
-                                       _get_dataset_qualities,
-                                       _get_online_dataset_arff,
-                                       _get_online_dataset_format,
-                                       DATASETS_CACHE_DIR_NAME)
+from openml.datasets.functions import (
+    create_dataset,
+    attributes_arff_from_df,
+    _get_cached_dataset,
+    _get_cached_dataset_features,
+    _get_cached_dataset_qualities,
+    _get_cached_datasets,
+    _get_dataset_arff,
+    _get_dataset_description,
+    _get_dataset_features,
+    _get_dataset_qualities,
+    _get_online_dataset_arff,
+    _get_online_dataset_format,
+    DATASETS_CACHE_DIR_NAME,
+)
 
 
 class TestOpenMLDataset(TestBase):
@@ -46,14 +47,14 @@ def tearDown(self):
         super(TestOpenMLDataset, self).tearDown()
 
     def _remove_pickle_files(self):
-        self.lock_path = os.path.join(openml.config.get_cache_directory(), 'locks')
-        for did in ['-1', '2']:
+        self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks")
+        for did in ["-1", "2"]:
             with lockutils.external_lock(
-                    name='datasets.functions.get_dataset:%s' % did,
-                    lock_path=self.lock_path,
+                name="datasets.functions.get_dataset:%s" % did, lock_path=self.lock_path,
             ):
-                pickle_path = os.path.join(openml.config.get_cache_directory(), 'datasets',
-                                           did, 'dataset.pkl.py3')
+                pickle_path = os.path.join(
+                    openml.config.get_cache_directory(), "datasets", did, "dataset.pkl.py3"
+                )
                 try:
                     os.remove(pickle_path)
                 except (OSError, FileNotFoundError):
@@ -63,19 +64,19 @@ def _remove_pickle_files(self):
     def _get_empty_param_for_dataset(self):
 
         return {
-            'name': None,
-            'description': None,
-            'creator': None,
-            'contributor': None,
-            'collection_date': None,
-            'language': None,
-            'licence': None,
-            'default_target_attribute': None,
-            'row_id_attribute': None,
-            'ignore_attribute': None,
-            'citation': None,
-            'attributes': None,
-            'data': None
+            "name": None,
+            "description": None,
+            "creator": None,
+            "contributor": None,
+            "collection_date": None,
+            "language": None,
+            "licence": None,
+            "default_target_attribute": None,
+            "row_id_attribute": None,
+            "ignore_attribute": None,
+            "citation": None,
+            "attributes": None,
+            "data": None,
         }
 
     def test__list_cached_datasets(self):
@@ -85,7 +86,7 @@ def test__list_cached_datasets(self):
         self.assertEqual(len(cached_datasets), 2)
         self.assertIsInstance(cached_datasets[0], int)
 
-    @mock.patch('openml.datasets.functions._list_cached_datasets')
+    @mock.patch("openml.datasets.functions._list_cached_datasets")
     def test__get_cached_datasets(self, _list_cached_datasets_mock):
         openml.config.cache_directory = self.static_cache_dir
         _list_cached_datasets_mock.return_value = [-1, 2]
@@ -94,14 +95,14 @@ def test__get_cached_datasets(self, _list_cached_datasets_mock):
         self.assertEqual(len(datasets), 2)
         self.assertIsInstance(list(datasets.values())[0], OpenMLDataset)
 
-    def test__get_cached_dataset(self, ):
+    def test__get_cached_dataset(self,):
         openml.config.cache_directory = self.static_cache_dir
         dataset = _get_cached_dataset(2)
         features = _get_cached_dataset_features(2)
         qualities = _get_cached_dataset_qualities(2)
         self.assertIsInstance(dataset, OpenMLDataset)
         self.assertTrue(len(dataset.features) > 0)
-        self.assertTrue(len(dataset.features) == len(features['oml:feature']))
+        self.assertTrue(len(dataset.features) == len(features["oml:feature"]))
         self.assertTrue(len(dataset.qualities) == len(qualities))
 
     def test_get_cached_dataset_description(self):
@@ -111,10 +112,12 @@ def test_get_cached_dataset_description(self):
 
     def test_get_cached_dataset_description_not_cached(self):
         openml.config.cache_directory = self.static_cache_dir
-        self.assertRaisesRegex(OpenMLCacheException,
-                               "Dataset description for dataset id 3 not cached",
-                               openml.datasets.functions._get_cached_dataset_description,
-                               dataset_id=3)
+        self.assertRaisesRegex(
+            OpenMLCacheException,
+            "Dataset description for dataset id 3 not cached",
+            openml.datasets.functions._get_cached_dataset_description,
+            dataset_id=3,
+        )
 
     def test_get_cached_dataset_arff(self):
         openml.config.cache_directory = self.static_cache_dir
@@ -123,29 +126,31 @@ def test_get_cached_dataset_arff(self):
 
     def test_get_cached_dataset_arff_not_cached(self):
         openml.config.cache_directory = self.static_cache_dir
-        self.assertRaisesRegex(OpenMLCacheException,
-                               "ARFF file for dataset id 3 not cached",
-                               openml.datasets.functions._get_cached_dataset_arff,
-                               dataset_id=3)
+        self.assertRaisesRegex(
+            OpenMLCacheException,
+            "ARFF file for dataset id 3 not cached",
+            openml.datasets.functions._get_cached_dataset_arff,
+            dataset_id=3,
+        )
 
     def _check_dataset(self, dataset):
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
-        self.assertIn('did', dataset)
-        self.assertIsInstance(dataset['did'], int)
-        self.assertIn('status', dataset)
-        self.assertIsInstance(dataset['status'], str)
-        self.assertIn(dataset['status'], ['in_preparation', 'active', 'deactivated'])
+        self.assertIn("did", dataset)
+        self.assertIsInstance(dataset["did"], int)
+        self.assertIn("status", dataset)
+        self.assertIsInstance(dataset["status"], str)
+        self.assertIn(dataset["status"], ["in_preparation", "active", "deactivated"])
 
     def _check_datasets(self, datasets):
         for did in datasets:
             self._check_dataset(datasets[did])
 
     def test_tag_untag_dataset(self):
-        tag = 'test_tag_%d' % random.randint(1, 1000000)
-        all_tags = _tag_entity('data', 1, tag)
+        tag = "test_tag_%d" % random.randint(1, 1000000)
+        all_tags = _tag_entity("data", 1, tag)
         self.assertTrue(tag in all_tags)
-        all_tags = _tag_entity('data', 1, tag, untag=True)
+        all_tags = _tag_entity("data", 1, tag, untag=True)
         self.assertTrue(tag not in all_tags)
 
     def test_list_datasets(self):
@@ -157,12 +162,12 @@ def test_list_datasets(self):
         self._check_datasets(datasets)
 
     def test_list_datasets_output_format(self):
-        datasets = openml.datasets.list_datasets(output_format='dataframe')
+        datasets = openml.datasets.list_datasets(output_format="dataframe")
         self.assertIsInstance(datasets, pd.DataFrame)
         self.assertGreaterEqual(len(datasets), 100)
 
     def test_list_datasets_by_tag(self):
-        datasets = openml.datasets.list_datasets(tag='study_14')
+        datasets = openml.datasets.list_datasets(tag="study_14")
         self.assertGreaterEqual(len(datasets), 100)
         self._check_datasets(datasets)
 
@@ -192,9 +197,9 @@ def test_list_datasets_by_number_missing_values(self):
         self._check_datasets(datasets)
 
     def test_list_datasets_combined_filters(self):
-        datasets = openml.datasets.list_datasets(tag='study_14',
-                                                 number_instances="100..1000",
-                                                 number_missing_values="800..1000")
+        datasets = openml.datasets.list_datasets(
+            tag="study_14", number_instances="100..1000", number_missing_values="800..1000"
+        )
         self.assertGreaterEqual(len(datasets), 1)
         self._check_datasets(datasets)
 
@@ -207,9 +212,9 @@ def test_list_datasets_paginate(self):
             self._check_datasets(datasets)
 
     def test_list_datasets_empty(self):
-        datasets = openml.datasets.list_datasets(tag='NoOneWouldUseThisTagAnyway')
+        datasets = openml.datasets.list_datasets(tag="NoOneWouldUseThisTagAnyway")
         if len(datasets) > 0:
-            raise ValueError('UnitTest Outdated, tag was already used (please remove)')
+            raise ValueError("UnitTest Outdated, tag was already used (please remove)")
 
         self.assertIsInstance(datasets, dict)
 
@@ -221,7 +226,7 @@ def test_check_datasets_active(self):
         self.assertFalse(active[17])
         self.assertRaisesRegex(
             ValueError,
-            'Could not find dataset 79 in OpenML dataset list.',
+            "Could not find dataset 79 in OpenML dataset list.",
             openml.datasets.check_datasets_active,
             [79],
         )
@@ -237,33 +242,53 @@ def _datasets_retrieved_successfully(self, dids, metadata_only=True):
             - absence of data arff if metadata_only, else it must be present too.
         """
         for did in dids:
-            self.assertTrue(os.path.exists(os.path.join(
-                openml.config.get_cache_directory(), "datasets", str(did), "description.xml")))
-            self.assertTrue(os.path.exists(os.path.join(
-                openml.config.get_cache_directory(), "datasets", str(did), "qualities.xml")))
-            self.assertTrue(os.path.exists(os.path.join(
-                openml.config.get_cache_directory(), "datasets", str(did), "features.xml")))
+            self.assertTrue(
+                os.path.exists(
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", str(did), "description.xml"
+                    )
+                )
+            )
+            self.assertTrue(
+                os.path.exists(
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", str(did), "qualities.xml"
+                    )
+                )
+            )
+            self.assertTrue(
+                os.path.exists(
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", str(did), "features.xml"
+                    )
+                )
+            )
 
             data_assert = self.assertFalse if metadata_only else self.assertTrue
-            data_assert(os.path.exists(os.path.join(
-                openml.config.get_cache_directory(), "datasets", str(did), "dataset.arff")))
+            data_assert(
+                os.path.exists(
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", str(did), "dataset.arff"
+                    )
+                )
+            )
 
     def test__name_to_id_with_deactivated(self):
         """ Check that an activated dataset is returned if an earlier deactivated one exists. """
         openml.config.server = self.production_server
         # /d/1 was deactivated
-        self.assertEqual(openml.datasets.functions._name_to_id('anneal'), 2)
+        self.assertEqual(openml.datasets.functions._name_to_id("anneal"), 2)
         openml.config.server = self.test_server
 
     def test__name_to_id_with_multiple_active(self):
         """ With multiple active datasets, retrieve the least recent active. """
         openml.config.server = self.production_server
-        self.assertEqual(openml.datasets.functions._name_to_id('iris'), 61)
+        self.assertEqual(openml.datasets.functions._name_to_id("iris"), 61)
 
     def test__name_to_id_with_version(self):
         """ With multiple active datasets, retrieve the least recent active. """
         openml.config.server = self.production_server
-        self.assertEqual(openml.datasets.functions._name_to_id('iris', version=3), 969)
+        self.assertEqual(openml.datasets.functions._name_to_id("iris", version=3), 969)
 
     def test__name_to_id_with_multiple_active_error(self):
         """ With multiple active datasets, retrieve the least recent active. """
@@ -272,8 +297,8 @@ def test__name_to_id_with_multiple_active_error(self):
             ValueError,
             "Multiple active datasets exist with name iris",
             openml.datasets.functions._name_to_id,
-            dataset_name='iris',
-            error_if_multiple=True
+            dataset_name="iris",
+            error_if_multiple=True,
         )
 
     def test__name_to_id_name_does_not_exist(self):
@@ -282,7 +307,7 @@ def test__name_to_id_name_does_not_exist(self):
             RuntimeError,
             "No active datasets exist with name does_not_exist",
             openml.datasets.functions._name_to_id,
-            dataset_name='does_not_exist'
+            dataset_name="does_not_exist",
         )
 
     def test__name_to_id_version_does_not_exist(self):
@@ -291,20 +316,20 @@ def test__name_to_id_version_does_not_exist(self):
             RuntimeError,
             "No active datasets exist with name iris and version 100000",
             openml.datasets.functions._name_to_id,
-            dataset_name='iris',
-            version=100000
+            dataset_name="iris",
+            version=100000,
         )
 
     def test_get_datasets_by_name(self):
         # did 1 and 2 on the test server:
-        dids = ['anneal', 'kr-vs-kp']
+        dids = ["anneal", "kr-vs-kp"]
         datasets = openml.datasets.get_datasets(dids, download_data=False)
         self.assertEqual(len(datasets), 2)
         self._datasets_retrieved_successfully([1, 2])
 
     def test_get_datasets_by_mixed(self):
         # did 1 and 2 on the test server:
-        dids = ['anneal', 2]
+        dids = ["anneal", 2]
         datasets = openml.datasets.get_datasets(dids, download_data=False)
         self.assertEqual(len(datasets), 2)
         self._datasets_retrieved_successfully([1, 2])
@@ -326,7 +351,7 @@ def test_get_datasets_lazy(self):
         self._datasets_retrieved_successfully([1, 2], metadata_only=False)
 
     def test_get_dataset_by_name(self):
-        dataset = openml.datasets.get_dataset('anneal')
+        dataset = openml.datasets.get_dataset("anneal")
         self.assertEqual(type(dataset), OpenMLDataset)
         self.assertEqual(dataset.dataset_id, 1)
         self._datasets_retrieved_successfully([1], metadata_only=False)
@@ -342,7 +367,7 @@ def test_get_dataset(self):
         # This is the only non-lazy load to ensure default behaviour works.
         dataset = openml.datasets.get_dataset(1)
         self.assertEqual(type(dataset), OpenMLDataset)
-        self.assertEqual(dataset.name, 'anneal')
+        self.assertEqual(dataset.name, "anneal")
         self._datasets_retrieved_successfully([1], metadata_only=False)
 
         self.assertGreater(len(dataset.features), 1)
@@ -355,7 +380,7 @@ def test_get_dataset(self):
     def test_get_dataset_lazy(self):
         dataset = openml.datasets.get_dataset(1, download_data=False)
         self.assertEqual(type(dataset), OpenMLDataset)
-        self.assertEqual(dataset.name, 'anneal')
+        self.assertEqual(dataset.name, "anneal")
         self._datasets_retrieved_successfully([1], metadata_only=True)
 
         self.assertGreater(len(dataset.features), 1)
@@ -374,42 +399,48 @@ def test_get_dataset_lazy_all_functions(self):
         # We only tests functions as general integrity is tested by test_get_dataset_lazy
 
         def ensure_absence_of_real_data():
-            self.assertFalse(os.path.exists(os.path.join(
-                openml.config.get_cache_directory(), "datasets", "1", "dataset.arff")))
+            self.assertFalse(
+                os.path.exists(
+                    os.path.join(
+                        openml.config.get_cache_directory(), "datasets", "1", "dataset.arff"
+                    )
+                )
+            )
 
-        tag = 'test_lazy_tag_%d' % random.randint(1, 1000000)
+        tag = "test_lazy_tag_%d" % random.randint(1, 1000000)
         dataset.push_tag(tag)
         ensure_absence_of_real_data()
 
         dataset.remove_tag(tag)
         ensure_absence_of_real_data()
 
-        nominal_indices = dataset.get_features_by_type('nominal')
+        nominal_indices = dataset.get_features_by_type("nominal")
+        # fmt: off
         correct = [0, 1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                    20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 35, 36, 37, 38]
+        # fmt: on
         self.assertEqual(nominal_indices, correct)
         ensure_absence_of_real_data()
 
         classes = dataset.retrieve_class_labels()
-        self.assertEqual(classes, ['1', '2', '3', '4', '5', 'U'])
+        self.assertEqual(classes, ["1", "2", "3", "4", "5", "U"])
         ensure_absence_of_real_data()
 
     def test_get_dataset_sparse(self):
         dataset = openml.datasets.get_dataset(102, download_data=False)
-        X, *_ = dataset.get_data(dataset_format='array')
+        X, *_ = dataset.get_data(dataset_format="array")
         self.assertIsInstance(X, scipy.sparse.csr_matrix)
 
     def test_download_rowid(self):
         # Smoke test which checks that the dataset has the row-id set correctly
         did = 44
         dataset = openml.datasets.get_dataset(did, download_data=False)
-        self.assertEqual(dataset.row_id_attribute, 'Counter')
+        self.assertEqual(dataset.row_id_attribute, "Counter")
 
     def test__get_dataset_description(self):
         description = _get_dataset_description(self.workdir, 2)
         self.assertIsInstance(description, dict)
-        description_xml_path = os.path.join(self.workdir,
-                                            'description.xml')
+        description_xml_path = os.path.join(self.workdir, "description.xml")
         self.assertTrue(os.path.exists(description_xml_path))
 
     def test__getarff_path_dataset_arff(self):
@@ -421,15 +452,15 @@ def test__getarff_path_dataset_arff(self):
 
     def test__getarff_md5_issue(self):
         description = {
-            'oml:id': 5,
-            'oml:md5_checksum': 'abc',
-            'oml:url': 'https://www.openml.org/data/download/61',
+            "oml:id": 5,
+            "oml:md5_checksum": "abc",
+            "oml:url": "https://www.openml.org/data/download/61",
         }
         self.assertRaisesRegex(
             OpenMLHashException,
-            'Checksum ad484452702105cbf3d30f8deaba39a9 of downloaded file '
-            'is unequal to the expected checksum abc. '
-            'Raised when downloading dataset 5.',
+            "Checksum ad484452702105cbf3d30f8deaba39a9 of downloaded file "
+            "is unequal to the expected checksum abc. "
+            "Raised when downloading dataset 5.",
             _get_dataset_arff,
             description,
         )
@@ -437,7 +468,7 @@ def test__getarff_md5_issue(self):
     def test__get_dataset_features(self):
         features = _get_dataset_features(self.workdir, 2)
         self.assertIsInstance(features, dict)
-        features_xml_path = os.path.join(self.workdir, 'features.xml')
+        features_xml_path = os.path.join(self.workdir, "features.xml")
         self.assertTrue(os.path.exists(features_xml_path))
 
     def test__get_dataset_qualities(self):
@@ -447,9 +478,7 @@ def test__get_dataset_qualities(self):
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        did_cache_dir = _create_cache_directory_for_id(
-            DATASETS_CACHE_DIR_NAME, 1,
-        )
+        did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, 1,)
         self.assertTrue(os.path.exists(did_cache_dir))
         openml.utils._remove_cache_dir_for_id(
             DATASETS_CACHE_DIR_NAME, did_cache_dir,
@@ -458,20 +487,19 @@ def test_deletion_of_cache_dir(self):
 
     # Use _get_dataset_arff to load the description, trigger an exception in the
     # test target and have a slightly higher coverage
-    @mock.patch('openml.datasets.functions._get_dataset_arff')
+    @mock.patch("openml.datasets.functions._get_dataset_arff")
     def test_deletion_of_cache_dir_faulty_download(self, patch):
-        patch.side_effect = Exception('Boom!')
-        self.assertRaisesRegex(Exception, 'Boom!', openml.datasets.get_dataset, dataset_id=1)
-        datasets_cache_dir = os.path.join(
-            self.workdir, 'org', 'openml', 'test', 'datasets'
-        )
+        patch.side_effect = Exception("Boom!")
+        self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
+        datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
         self.assertEqual(len(os.listdir(datasets_cache_dir)), 0)
 
     def test_publish_dataset(self):
         # lazy loading not possible as we need the arff-file.
         openml.datasets.get_dataset(3)
-        file_path = os.path.join(openml.config.get_cache_directory(),
-                                 "datasets", "3", "dataset.arff")
+        file_path = os.path.join(
+            openml.config.get_cache_directory(), "datasets", "3", "dataset.arff"
+        )
         dataset = OpenMLDataset(
             "anneal",
             "test",
@@ -482,18 +510,20 @@ def test_publish_dataset(self):
             data_file=file_path,
         )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.dataset_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.dataset_id))
+        TestBase._mark_entity_for_removal("data", dataset.dataset_id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id)
+        )
         self.assertIsInstance(dataset.dataset_id, int)
 
     def test__retrieve_class_labels(self):
         openml.config.cache_directory = self.static_cache_dir
         labels = openml.datasets.get_dataset(2, download_data=False).retrieve_class_labels()
-        self.assertEqual(labels, ['1', '2', '3', '4', '5', 'U'])
+        self.assertEqual(labels, ["1", "2", "3", "4", "5", "U"])
         labels = openml.datasets.get_dataset(2, download_data=False).retrieve_class_labels(
-            target_name='product-type')
-        self.assertEqual(labels, ['C', 'H', 'G'])
+            target_name="product-type"
+        )
+        self.assertEqual(labels, ["C", "H", "G"])
 
     def test_upload_dataset_with_url(self):
 
@@ -505,81 +535,91 @@ def test_upload_dataset_with_url(self):
             url="https://www.openml.org/data/download/61/dataset_61_iris.arff",
         )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.dataset_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.dataset_id))
+        TestBase._mark_entity_for_removal("data", dataset.dataset_id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id)
+        )
         self.assertIsInstance(dataset.dataset_id, int)
 
     def test_data_status(self):
         dataset = OpenMLDataset(
             "%s-UploadTestWithURL" % self._get_sentinel(),
-            "test", "ARFF",
+            "test",
+            "ARFF",
             version=1,
-            url="https://www.openml.org/data/download/61/dataset_61_iris.arff")
+            url="https://www.openml.org/data/download/61/dataset_61_iris.arff",
+        )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         did = dataset.id
 
         # admin key for test server (only adminds can activate datasets.
         # all users can deactivate their own datasets)
-        openml.config.apikey = 'd488d8afd93b32331cf6ea9d7003d4c3'
+        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
 
-        openml.datasets.status_update(did, 'active')
+        openml.datasets.status_update(did, "active")
         # need to use listing fn, as this is immune to cache
-        result = openml.datasets.list_datasets(data_id=[did], status='all')
+        result = openml.datasets.list_datasets(data_id=[did], status="all")
         self.assertEqual(len(result), 1)
-        self.assertEqual(result[did]['status'], 'active')
-        openml.datasets.status_update(did, 'deactivated')
+        self.assertEqual(result[did]["status"], "active")
+        openml.datasets.status_update(did, "deactivated")
         # need to use listing fn, as this is immune to cache
-        result = openml.datasets.list_datasets(data_id=[did], status='all')
+        result = openml.datasets.list_datasets(data_id=[did], status="all")
         self.assertEqual(len(result), 1)
-        self.assertEqual(result[did]['status'], 'deactivated')
-        openml.datasets.status_update(did, 'active')
+        self.assertEqual(result[did]["status"], "deactivated")
+        openml.datasets.status_update(did, "active")
         # need to use listing fn, as this is immune to cache
-        result = openml.datasets.list_datasets(data_id=[did], status='all')
+        result = openml.datasets.list_datasets(data_id=[did], status="all")
         self.assertEqual(len(result), 1)
-        self.assertEqual(result[did]['status'], 'active')
+        self.assertEqual(result[did]["status"], "active")
         with self.assertRaises(ValueError):
-            openml.datasets.status_update(did, 'in_preparation')
+            openml.datasets.status_update(did, "in_preparation")
         # need to use listing fn, as this is immune to cache
-        result = openml.datasets.list_datasets(data_id=[did], status='all')
+        result = openml.datasets.list_datasets(data_id=[did], status="all")
         self.assertEqual(len(result), 1)
-        self.assertEqual(result[did]['status'], 'active')
+        self.assertEqual(result[did]["status"], "active")
 
     def test_attributes_arff_from_df(self):
         # DataFrame case
         df = pd.DataFrame(
-            [[1, 1.0, 'xxx', 'A', True], [2, 2.0, 'yyy', 'B', False]],
-            columns=['integer', 'floating', 'string', 'category', 'boolean']
+            [[1, 1.0, "xxx", "A", True], [2, 2.0, "yyy", "B", False]],
+            columns=["integer", "floating", "string", "category", "boolean"],
         )
-        df['category'] = df['category'].astype('category')
+        df["category"] = df["category"].astype("category")
         attributes = attributes_arff_from_df(df)
-        self.assertEqual(attributes, [('integer', 'INTEGER'),
-                                      ('floating', 'REAL'),
-                                      ('string', 'STRING'),
-                                      ('category', ['A', 'B']),
-                                      ('boolean', ['True', 'False'])])
+        self.assertEqual(
+            attributes,
+            [
+                ("integer", "INTEGER"),
+                ("floating", "REAL"),
+                ("string", "STRING"),
+                ("category", ["A", "B"]),
+                ("boolean", ["True", "False"]),
+            ],
+        )
         # DataFrame with Sparse columns case
-        df = pd.DataFrame({"integer": pd.arrays.SparseArray([1, 2, 0], fill_value=0),
-                           "floating": pd.arrays.SparseArray([1.0, 2.0, 0], fill_value=0.0)})
-        df['integer'] = df['integer'].astype(np.int64)
+        df = pd.DataFrame(
+            {
+                "integer": pd.arrays.SparseArray([1, 2, 0], fill_value=0),
+                "floating": pd.arrays.SparseArray([1.0, 2.0, 0], fill_value=0.0),
+            }
+        )
+        df["integer"] = df["integer"].astype(np.int64)
         attributes = attributes_arff_from_df(df)
-        self.assertEqual(attributes, [('integer', 'INTEGER'),
-                                      ('floating', 'REAL')])
+        self.assertEqual(attributes, [("integer", "INTEGER"), ("floating", "REAL")])
 
     def test_attributes_arff_from_df_numeric_column(self):
         # Test column names are automatically converted to str if needed (#819)
-        df = pd.DataFrame({0: [1, 2, 3], 0.5: [4, 5, 6], 'target': [0, 1, 1]})
+        df = pd.DataFrame({0: [1, 2, 3], 0.5: [4, 5, 6], "target": [0, 1, 1]})
         attributes = attributes_arff_from_df(df)
-        self.assertEqual(attributes, [('0', 'INTEGER'), ('0.5', 'INTEGER'), ('target', 'INTEGER')])
+        self.assertEqual(attributes, [("0", "INTEGER"), ("0.5", "INTEGER"), ("target", "INTEGER")])
 
     def test_attributes_arff_from_df_mixed_dtype_categories(self):
         # liac-arff imposed categorical attributes to be of sting dtype. We
         # raise an error if this is not the case.
-        df = pd.DataFrame([[1], ['2'], [3.]])
-        df[0] = df[0].astype('category')
+        df = pd.DataFrame([[1], ["2"], [3.0]])
+        df[0] = df[0].astype("category")
         err_msg = "The column '0' of the dataframe is of 'category' dtype."
         with pytest.raises(ValueError, match=err_msg):
             attributes_arff_from_df(df)
@@ -588,253 +628,216 @@ def test_attributes_arff_from_df_unknown_dtype(self):
         # check that an error is raised when the dtype is not supptagorted by
         # liac-arff
         data = [
-            [[1], ['2'], [3.]],
-            [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02')],
-        ]
-        dtype = [
-            'mixed-integer',
-            'datetime64'
+            [[1], ["2"], [3.0]],
+            [pd.Timestamp("2012-05-01"), pd.Timestamp("2012-05-02")],
         ]
+        dtype = ["mixed-integer", "datetime64"]
         for arr, dt in zip(data, dtype):
             df = pd.DataFrame(arr)
-            err_msg = ("The dtype '{}' of the column '0' is not currently "
-                       "supported by liac-arff".format(dt))
+            err_msg = (
+                "The dtype '{}' of the column '0' is not currently "
+                "supported by liac-arff".format(dt)
+            )
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
 
     def test_create_dataset_numpy(self):
 
-        data = np.array(
-            [
-                [1, 2, 3],
-                [1.2, 2.5, 3.8],
-                [2, 5, 8],
-                [0, 1, 0]
-            ]
-        ).T
+        data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
 
-        attributes = [('col_{}'.format(i), 'REAL')
-                      for i in range(data.shape[1])]
+        attributes = [("col_{}".format(i), "REAL") for i in range(data.shape[1])]
 
         dataset = create_dataset(
-            name='%s-NumPy_testing_dataset' % self._get_sentinel(),
-            description='Synthetic dataset created from a NumPy array',
-            creator='OpenML tester',
+            name="%s-NumPy_testing_dataset" % self._get_sentinel(),
+            description="Synthetic dataset created from a NumPy array",
+            creator="OpenML tester",
             contributor=None,
-            collection_date='01-01-2018',
-            language='English',
-            licence='MIT',
-            default_target_attribute='col_{}'.format(data.shape[1] - 1),
+            collection_date="01-01-2018",
+            language="English",
+            licence="MIT",
+            default_target_attribute="col_{}".format(data.shape[1] - 1),
             row_id_attribute=None,
             ignore_attribute=None,
-            citation='None',
+            citation="None",
             attributes=attributes,
             data=data,
-            version_label='test',
-            original_data_url='http://openml.github.io/openml-python',
-            paper_url='http://openml.github.io/openml-python'
+            version_label="test",
+            original_data_url="http://openml.github.io/openml-python",
+            paper_url="http://openml.github.io/openml-python",
         )
 
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
 
         self.assertEqual(
             _get_online_dataset_arff(dataset.id),
             dataset._dataset,
-            "Uploaded arff does not match original one"
-        )
-        self.assertEqual(
-            _get_online_dataset_format(dataset.id),
-            'arff',
-            "Wrong format for dataset"
+            "Uploaded arff does not match original one",
         )
+        self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")
 
     def test_create_dataset_list(self):
 
         data = [
-            ['a', 'sunny', 85.0, 85.0, 'FALSE', 'no'],
-            ['b', 'sunny', 80.0, 90.0, 'TRUE', 'no'],
-            ['c', 'overcast', 83.0, 86.0, 'FALSE', 'yes'],
-            ['d', 'rainy', 70.0, 96.0, 'FALSE', 'yes'],
-            ['e', 'rainy', 68.0, 80.0, 'FALSE', 'yes'],
-            ['f', 'rainy', 65.0, 70.0, 'TRUE', 'no'],
-            ['g', 'overcast', 64.0, 65.0, 'TRUE', 'yes'],
-            ['h', 'sunny', 72.0, 95.0, 'FALSE', 'no'],
-            ['i', 'sunny', 69.0, 70.0, 'FALSE', 'yes'],
-            ['j', 'rainy', 75.0, 80.0, 'FALSE', 'yes'],
-            ['k', 'sunny', 75.0, 70.0, 'TRUE', 'yes'],
-            ['l', 'overcast', 72.0, 90.0, 'TRUE', 'yes'],
-            ['m', 'overcast', 81.0, 75.0, 'FALSE', 'yes'],
-            ['n', 'rainy', 71.0, 91.0, 'TRUE', 'no'],
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+            ["f", "rainy", 65.0, 70.0, "TRUE", "no"],
+            ["g", "overcast", 64.0, 65.0, "TRUE", "yes"],
+            ["h", "sunny", 72.0, 95.0, "FALSE", "no"],
+            ["i", "sunny", 69.0, 70.0, "FALSE", "yes"],
+            ["j", "rainy", 75.0, 80.0, "FALSE", "yes"],
+            ["k", "sunny", 75.0, 70.0, "TRUE", "yes"],
+            ["l", "overcast", 72.0, 90.0, "TRUE", "yes"],
+            ["m", "overcast", 81.0, 75.0, "FALSE", "yes"],
+            ["n", "rainy", 71.0, 91.0, "TRUE", "no"],
         ]
 
         attributes = [
-            ('rnd_str', 'STRING'),
-            ('outlook', ['sunny', 'overcast', 'rainy']),
-            ('temperature', 'REAL'),
-            ('humidity', 'REAL'),
-            ('windy', ['TRUE', 'FALSE']),
-            ('play', ['yes', 'no']),
+            ("rnd_str", "STRING"),
+            ("outlook", ["sunny", "overcast", "rainy"]),
+            ("temperature", "REAL"),
+            ("humidity", "REAL"),
+            ("windy", ["TRUE", "FALSE"]),
+            ("play", ["yes", "no"]),
         ]
 
         dataset = create_dataset(
             name="%s-ModifiedWeather" % self._get_sentinel(),
-            description=(
-                'Testing dataset upload when the data is a list of lists'
-            ),
-            creator='OpenML test',
+            description=("Testing dataset upload when the data is a list of lists"),
+            creator="OpenML test",
             contributor=None,
-            collection_date='21-09-2018',
-            language='English',
-            licence='MIT',
-            default_target_attribute='play',
+            collection_date="21-09-2018",
+            language="English",
+            licence="MIT",
+            default_target_attribute="play",
             row_id_attribute=None,
             ignore_attribute=None,
-            citation='None',
+            citation="None",
             attributes=attributes,
             data=data,
-            version_label='test',
-            original_data_url='http://openml.github.io/openml-python',
-            paper_url='http://openml.github.io/openml-python'
+            version_label="test",
+            original_data_url="http://openml.github.io/openml-python",
+            paper_url="http://openml.github.io/openml-python",
         )
 
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         self.assertEqual(
             _get_online_dataset_arff(dataset.id),
             dataset._dataset,
-            "Uploaded ARFF does not match original one"
-        )
-        self.assertEqual(
-            _get_online_dataset_format(dataset.id),
-            'arff',
-            "Wrong format for dataset"
+            "Uploaded ARFF does not match original one",
         )
+        self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")
 
     def test_create_dataset_sparse(self):
 
         # test the scipy.sparse.coo_matrix
-        sparse_data = scipy.sparse.coo_matrix((
-            [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-            ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
-        ))
+        sparse_data = scipy.sparse.coo_matrix(
+            ([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
+        )
 
         column_names = [
-            ('input1', 'REAL'),
-            ('input2', 'REAL'),
-            ('y', 'REAL'),
+            ("input1", "REAL"),
+            ("input2", "REAL"),
+            ("y", "REAL"),
         ]
 
         xor_dataset = create_dataset(
             name="%s-XOR" % self._get_sentinel(),
-            description='Dataset representing the XOR operation',
+            description="Dataset representing the XOR operation",
             creator=None,
             contributor=None,
             collection_date=None,
-            language='English',
+            language="English",
             licence=None,
-            default_target_attribute='y',
+            default_target_attribute="y",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=None,
             attributes=column_names,
             data=sparse_data,
-            version_label='test',
+            version_label="test",
         )
 
         xor_dataset.publish()
-        TestBase._mark_entity_for_removal('data', xor_dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            xor_dataset.id))
+        TestBase._mark_entity_for_removal("data", xor_dataset.id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id)
+        )
         self.assertEqual(
             _get_online_dataset_arff(xor_dataset.id),
             xor_dataset._dataset,
-            "Uploaded ARFF does not match original one"
+            "Uploaded ARFF does not match original one",
         )
         self.assertEqual(
-            _get_online_dataset_format(xor_dataset.id),
-            'sparse_arff',
-            "Wrong format for dataset"
+            _get_online_dataset_format(xor_dataset.id), "sparse_arff", "Wrong format for dataset"
         )
 
         # test the list of dicts sparse representation
-        sparse_data = [
-            {0: 0.0},
-            {1: 1.0, 2: 1.0},
-            {0: 1.0, 2: 1.0},
-            {0: 1.0, 1: 1.0}
-        ]
+        sparse_data = [{0: 0.0}, {1: 1.0, 2: 1.0}, {0: 1.0, 2: 1.0}, {0: 1.0, 1: 1.0}]
 
         xor_dataset = create_dataset(
             name="%s-XOR" % self._get_sentinel(),
-            description='Dataset representing the XOR operation',
+            description="Dataset representing the XOR operation",
             creator=None,
             contributor=None,
             collection_date=None,
-            language='English',
+            language="English",
             licence=None,
-            default_target_attribute='y',
+            default_target_attribute="y",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=None,
             attributes=column_names,
             data=sparse_data,
-            version_label='test',
+            version_label="test",
         )
 
         xor_dataset.publish()
-        TestBase._mark_entity_for_removal('data', xor_dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            xor_dataset.id))
+        TestBase._mark_entity_for_removal("data", xor_dataset.id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id)
+        )
         self.assertEqual(
             _get_online_dataset_arff(xor_dataset.id),
             xor_dataset._dataset,
-            "Uploaded ARFF does not match original one"
+            "Uploaded ARFF does not match original one",
         )
         self.assertEqual(
-            _get_online_dataset_format(xor_dataset.id),
-            'sparse_arff',
-            "Wrong format for dataset"
+            _get_online_dataset_format(xor_dataset.id), "sparse_arff", "Wrong format for dataset"
         )
 
     def test_create_invalid_dataset(self):
 
         data = [
-            'sunny',
-            'overcast',
-            'overcast',
-            'rainy',
-            'rainy',
-            'rainy',
-            'overcast',
-            'sunny',
-            'sunny',
-            'rainy',
-            'sunny',
-            'overcast',
-            'overcast',
-            'rainy',
+            "sunny",
+            "overcast",
+            "overcast",
+            "rainy",
+            "rainy",
+            "rainy",
+            "overcast",
+            "sunny",
+            "sunny",
+            "rainy",
+            "sunny",
+            "overcast",
+            "overcast",
+            "rainy",
         ]
 
         param = self._get_empty_param_for_dataset()
-        param['data'] = data
+        param["data"] = data
 
-        self.assertRaises(
-            ValueError,
-            create_dataset,
-            **param
-        )
+        self.assertRaises(ValueError, create_dataset, **param)
 
-        param['data'] = data[0]
-        self.assertRaises(
-            ValueError,
-            create_dataset,
-            **param
-        )
+        param["data"] = data[0]
+        self.assertRaises(ValueError, create_dataset, **param)
 
     def test_get_online_dataset_arff(self):
         dataset_id = 100  # Australian
@@ -850,10 +853,9 @@ def test_get_online_dataset_arff(self):
             decoder.decode(
                 _get_online_dataset_arff(dataset_id),
                 encode_nominal=True,
-                return_type=arff.DENSE
-                if d_format == 'arff' else arff.COO
+                return_type=arff.DENSE if d_format == "arff" else arff.COO,
             ),
-            "ARFF files are not equal"
+            "ARFF files are not equal",
         )
 
     def test_get_online_dataset_format(self):
@@ -865,35 +867,34 @@ def test_get_online_dataset_format(self):
         self.assertEqual(
             (dataset.format).lower(),
             _get_online_dataset_format(dataset_id),
-            "The format of the ARFF files is different"
+            "The format of the ARFF files is different",
         )
 
     def test_create_dataset_pandas(self):
         data = [
-            ['a', 'sunny', 85.0, 85.0, 'FALSE', 'no'],
-            ['b', 'sunny', 80.0, 90.0, 'TRUE', 'no'],
-            ['c', 'overcast', 83.0, 86.0, 'FALSE', 'yes'],
-            ['d', 'rainy', 70.0, 96.0, 'FALSE', 'yes'],
-            ['e', 'rainy', 68.0, 80.0, 'FALSE', 'yes']
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
         ]
-        column_names = ['rnd_str', 'outlook', 'temperature', 'humidity',
-                        'windy', 'play']
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
         df = pd.DataFrame(data, columns=column_names)
         # enforce the type of each column
-        df['outlook'] = df['outlook'].astype('category')
-        df['windy'] = df['windy'].astype('bool')
-        df['play'] = df['play'].astype('category')
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
         # meta-information
-        name = '%s-pandas_testing_dataset' % self._get_sentinel()
-        description = 'Synthetic dataset created from a Pandas DataFrame'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'play'
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "play"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -906,31 +907,29 @@ def test_create_dataset_pandas(self):
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
-            attributes='auto',
+            attributes="auto",
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         self.assertEqual(
             _get_online_dataset_arff(dataset.id),
             dataset._dataset,
-            "Uploaded ARFF does not match original one"
+            "Uploaded ARFF does not match original one",
         )
 
         # Check that DataFrame with Sparse columns are supported properly
-        sparse_data = scipy.sparse.coo_matrix((
-            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-            ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1])
-        ))
-        column_names = ['input1', 'input2', 'y']
+        sparse_data = scipy.sparse.coo_matrix(
+            ([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
+        )
+        column_names = ["input1", "input2", "y"]
         df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
         # meta-information
-        description = 'Synthetic dataset created from a Pandas DataFrame with Sparse columns'
+        description = "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -943,33 +942,30 @@ def test_create_dataset_pandas(self):
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
-            attributes='auto',
+            attributes="auto",
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         self.assertEqual(
             _get_online_dataset_arff(dataset.id),
             dataset._dataset,
-            "Uploaded ARFF does not match original one"
+            "Uploaded ARFF does not match original one",
         )
         self.assertEqual(
-            _get_online_dataset_format(dataset.id),
-            'sparse_arff',
-            "Wrong format for dataset"
+            _get_online_dataset_format(dataset.id), "sparse_arff", "Wrong format for dataset"
         )
 
         # Check that we can overwrite the attributes
-        data = [['a'], ['b'], ['c'], ['d'], ['e']]
-        column_names = ['rnd_str']
+        data = [["a"], ["b"], ["c"], ["d"], ["e"]]
+        column_names = ["rnd_str"]
         df = pd.DataFrame(data, columns=column_names)
-        df['rnd_str'] = df['rnd_str'].astype('category')
-        attributes = {'rnd_str': ['a', 'b', 'c', 'd', 'e', 'f', 'g']}
+        df["rnd_str"] = df["rnd_str"].astype("category")
+        attributes = {"rnd_str": ["a", "b", "c", "d", "e", "f", "g"]}
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -984,49 +980,44 @@ def test_create_dataset_pandas(self):
             citation=citation,
             attributes=attributes,
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         downloaded_data = _get_online_dataset_arff(dataset.id)
         self.assertEqual(
-            downloaded_data,
-            dataset._dataset,
-            "Uploaded ARFF does not match original one"
+            downloaded_data, dataset._dataset, "Uploaded ARFF does not match original one"
         )
-        self.assertTrue(
-            '@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}' in downloaded_data)
+        self.assertTrue("@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data)
 
     def test_ignore_attributes_dataset(self):
         data = [
-            ['a', 'sunny', 85.0, 85.0, 'FALSE', 'no'],
-            ['b', 'sunny', 80.0, 90.0, 'TRUE', 'no'],
-            ['c', 'overcast', 83.0, 86.0, 'FALSE', 'yes'],
-            ['d', 'rainy', 70.0, 96.0, 'FALSE', 'yes'],
-            ['e', 'rainy', 68.0, 80.0, 'FALSE', 'yes']
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
         ]
-        column_names = ['rnd_str', 'outlook', 'temperature', 'humidity',
-                        'windy', 'play']
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
         df = pd.DataFrame(data, columns=column_names)
         # enforce the type of each column
-        df['outlook'] = df['outlook'].astype('category')
-        df['windy'] = df['windy'].astype('bool')
-        df['play'] = df['play'].astype('category')
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
         # meta-information
-        name = '%s-pandas_testing_dataset' % self._get_sentinel()
-        description = 'Synthetic dataset created from a Pandas DataFrame'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'play'
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "play"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
 
         # we use the create_dataset function which call the OpenMLDataset
         # constructor
@@ -1041,18 +1032,18 @@ def test_ignore_attributes_dataset(self):
             licence=licence,
             default_target_attribute=default_target_attribute,
             row_id_attribute=None,
-            ignore_attribute='outlook',
+            ignore_attribute="outlook",
             citation=citation,
-            attributes='auto',
+            attributes="auto",
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
-        self.assertEqual(dataset.ignore_attribute, ['outlook'])
+        self.assertEqual(dataset.ignore_attribute, ["outlook"])
 
         # pass a list to ignore_attribute
-        ignore_attribute = ['outlook', 'windy']
+        ignore_attribute = ["outlook", "windy"]
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -1065,16 +1056,16 @@ def test_ignore_attributes_dataset(self):
             row_id_attribute=None,
             ignore_attribute=ignore_attribute,
             citation=citation,
-            attributes='auto',
+            attributes="auto",
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
         self.assertEqual(dataset.ignore_attribute, ignore_attribute)
 
         # raise an error if unknown type
-        err_msg = 'Wrong data type for ignore_attribute. Should be list.'
+        err_msg = "Wrong data type for ignore_attribute. Should be list."
         with pytest.raises(ValueError, match=err_msg):
             openml.datasets.functions.create_dataset(
                 name=name,
@@ -1086,45 +1077,44 @@ def test_ignore_attributes_dataset(self):
                 licence=licence,
                 default_target_attribute=default_target_attribute,
                 row_id_attribute=None,
-                ignore_attribute=tuple(['outlook', 'windy']),
+                ignore_attribute=tuple(["outlook", "windy"]),
                 citation=citation,
-                attributes='auto',
+                attributes="auto",
                 data=df,
-                version_label='test',
+                version_label="test",
                 original_data_url=original_data_url,
-                paper_url=paper_url
+                paper_url=paper_url,
             )
 
     def test_publish_fetch_ignore_attribute(self):
         """Test to upload and retrieve dataset and check ignore_attributes"""
         data = [
-            ['a', 'sunny', 85.0, 85.0, 'FALSE', 'no'],
-            ['b', 'sunny', 80.0, 90.0, 'TRUE', 'no'],
-            ['c', 'overcast', 83.0, 86.0, 'FALSE', 'yes'],
-            ['d', 'rainy', 70.0, 96.0, 'FALSE', 'yes'],
-            ['e', 'rainy', 68.0, 80.0, 'FALSE', 'yes']
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
         ]
-        column_names = ['rnd_str', 'outlook', 'temperature', 'humidity',
-                        'windy', 'play']
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
         df = pd.DataFrame(data, columns=column_names)
         # enforce the type of each column
-        df['outlook'] = df['outlook'].astype('category')
-        df['windy'] = df['windy'].astype('bool')
-        df['play'] = df['play'].astype('category')
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
         # meta-information
-        name = '%s-pandas_testing_dataset' % self._get_sentinel()
-        description = 'Synthetic dataset created from a Pandas DataFrame'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'play'
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "play"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
 
         # pass a list to ignore_attribute
-        ignore_attribute = ['outlook', 'windy']
+        ignore_attribute = ["outlook", "windy"]
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -1137,18 +1127,17 @@ def test_publish_fetch_ignore_attribute(self):
             row_id_attribute=None,
             ignore_attribute=ignore_attribute,
             citation=citation,
-            attributes='auto',
+            attributes="auto",
             data=df,
-            version_label='test',
+            version_label="test",
             original_data_url=original_data_url,
-            paper_url=paper_url
+            paper_url=paper_url,
         )
 
         # publish dataset
         dataset.publish()
-        TestBase._mark_entity_for_removal('data', dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            dataset.id))
+        TestBase._mark_entity_for_removal("data", dataset.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         # test if publish was successful
         self.assertIsInstance(dataset.id, int)
 
@@ -1174,26 +1163,22 @@ def test_publish_fetch_ignore_attribute(self):
 
     def test_create_dataset_row_id_attribute_error(self):
         # meta-information
-        name = '%s-pandas_testing_dataset' % self._get_sentinel()
-        description = 'Synthetic dataset created from a Pandas DataFrame'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'target'
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "target"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
         # Check that the index name is well inferred.
-        data = [['a', 1, 0],
-                ['b', 2, 1],
-                ['c', 3, 0],
-                ['d', 4, 1],
-                ['e', 5, 0]]
-        column_names = ['rnd_str', 'integer', 'target']
+        data = [["a", 1, 0], ["b", 2, 1], ["c", 3, 0], ["d", 4, 1], ["e", 5, 0]]
+        column_names = ["rnd_str", "integer", "target"]
         df = pd.DataFrame(data, columns=column_names)
         # affecting row_id_attribute to an unknown column should raise an error
-        err_msg = ("should be one of the data attribute.")
+        err_msg = "should be one of the data attribute."
         with pytest.raises(ValueError, match=err_msg):
             openml.datasets.functions.create_dataset(
                 name=name,
@@ -1206,40 +1191,36 @@ def test_create_dataset_row_id_attribute_error(self):
                 default_target_attribute=default_target_attribute,
                 ignore_attribute=None,
                 citation=citation,
-                attributes='auto',
+                attributes="auto",
                 data=df,
-                row_id_attribute='unknown_row_id',
-                version_label='test',
+                row_id_attribute="unknown_row_id",
+                version_label="test",
                 original_data_url=original_data_url,
-                paper_url=paper_url
+                paper_url=paper_url,
             )
 
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
-        name = '%s-pandas_testing_dataset' % self._get_sentinel()
-        description = 'Synthetic dataset created from a Pandas DataFrame'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'target'
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "target"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
         # Check that the index name is well inferred.
-        data = [['a', 1, 0],
-                ['b', 2, 1],
-                ['c', 3, 0],
-                ['d', 4, 1],
-                ['e', 5, 0]]
-        column_names = ['rnd_str', 'integer', 'target']
+        data = [["a", 1, 0], ["b", 2, 1], ["c", 3, 0], ["d", 4, 1], ["e", 5, 0]]
+        column_names = ["rnd_str", "integer", "target"]
         df = pd.DataFrame(data, columns=column_names)
-        row_id_attr = [None, 'integer']
-        df_index_name = [None, 'index_name']
-        expected_row_id = [None, 'index_name', 'integer', 'integer']
-        for output_row_id, (row_id, index_name) in zip(expected_row_id,
-                                                       product(row_id_attr,
-                                                               df_index_name)):
+        row_id_attr = [None, "integer"]
+        df_index_name = [None, "index_name"]
+        expected_row_id = [None, "index_name", "integer", "integer"]
+        for output_row_id, (row_id, index_name) in zip(
+            expected_row_id, product(row_id_attr, df_index_name)
+        ):
             df.index.name = index_name
             dataset = openml.datasets.functions.create_dataset(
                 name=name,
@@ -1252,20 +1233,21 @@ def test_create_dataset_row_id_attribute_inference(self):
                 default_target_attribute=default_target_attribute,
                 ignore_attribute=None,
                 citation=citation,
-                attributes='auto',
+                attributes="auto",
                 data=df,
                 row_id_attribute=row_id,
-                version_label='test',
+                version_label="test",
                 original_data_url=original_data_url,
-                paper_url=paper_url
+                paper_url=paper_url,
             )
             self.assertEqual(dataset.row_id_attribute, output_row_id)
             dataset.publish()
-            TestBase._mark_entity_for_removal('data', dataset.id)
-            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                dataset.id))
+            TestBase._mark_entity_for_removal("data", dataset.id)
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
+            )
             arff_dataset = arff.loads(_get_online_dataset_arff(dataset.id))
-            arff_data = np.array(arff_dataset['data'], dtype=object)
+            arff_data = np.array(arff_dataset["data"], dtype=object)
             # if we set the name of the index then the index will be added to
             # the data
             expected_shape = (5, 3) if index_name is None else (5, 4)
@@ -1273,21 +1255,18 @@ def test_create_dataset_row_id_attribute_inference(self):
 
     def test_create_dataset_attributes_auto_without_df(self):
         # attributes cannot be inferred without passing a dataframe
-        data = np.array([[1, 2, 3],
-                         [1.2, 2.5, 3.8],
-                         [2, 5, 8],
-                         [0, 1, 0]]).T
-        attributes = 'auto'
-        name = 'NumPy_testing_dataset'
-        description = 'Synthetic dataset created from a NumPy array'
-        creator = 'OpenML tester'
-        collection_date = '01-01-2018'
-        language = 'English'
-        licence = 'MIT'
-        default_target_attribute = 'col_{}'.format(data.shape[1] - 1)
-        citation = 'None'
-        original_data_url = 'http://openml.github.io/openml-python'
-        paper_url = 'http://openml.github.io/openml-python'
+        data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
+        attributes = "auto"
+        name = "NumPy_testing_dataset"
+        description = "Synthetic dataset created from a NumPy array"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        default_target_attribute = "col_{}".format(data.shape[1] - 1)
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
         err_msg = "Automatically inferring attributes requires a pandas"
         with pytest.raises(ValueError, match=err_msg):
             openml.datasets.functions.create_dataset(
@@ -1304,9 +1283,9 @@ def test_create_dataset_attributes_auto_without_df(self):
                 citation=citation,
                 attributes=attributes,
                 data=data,
-                version_label='test',
+                version_label="test",
                 original_data_url=original_data_url,
-                paper_url=paper_url
+                paper_url=paper_url,
             )
 
     def test_list_qualities(self):
@@ -1317,7 +1296,7 @@ def test_list_qualities(self):
     def test_get_dataset_cache_format_pickle(self):
         dataset = openml.datasets.get_dataset(1)
         self.assertEqual(type(dataset), OpenMLDataset)
-        self.assertEqual(dataset.name, 'anneal')
+        self.assertEqual(dataset.name, "anneal")
         self.assertGreater(len(dataset.features), 1)
         self.assertGreater(len(dataset.qualities), 4)
 
@@ -1329,21 +1308,21 @@ def test_get_dataset_cache_format_pickle(self):
 
     def test_get_dataset_cache_format_feather(self):
 
-        dataset = openml.datasets.get_dataset(128, cache_format='feather')
+        dataset = openml.datasets.get_dataset(128, cache_format="feather")
 
         # Check if dataset is written to cache directory using feather
         cache_dir = openml.config.get_cache_directory()
-        cache_dir_for_id = os.path.join(cache_dir, 'datasets', '128')
-        feather_file = os.path.join(cache_dir_for_id, 'dataset.feather')
-        pickle_file = os.path.join(cache_dir_for_id, 'dataset.feather.attributes.pkl.py3')
+        cache_dir_for_id = os.path.join(cache_dir, "datasets", "128")
+        feather_file = os.path.join(cache_dir_for_id, "dataset.feather")
+        pickle_file = os.path.join(cache_dir_for_id, "dataset.feather.attributes.pkl.py3")
         data = pd.read_feather(feather_file)
-        self.assertTrue(os.path.isfile(feather_file), msg='Feather file is missing')
-        self.assertTrue(os.path.isfile(pickle_file), msg='Attributes pickle file is missing')
+        self.assertTrue(os.path.isfile(feather_file), msg="Feather file is missing")
+        self.assertTrue(os.path.isfile(pickle_file), msg="Attributes pickle file is missing")
         self.assertEqual(data.shape, (150, 5))
 
         # Check if get_data is able to retrieve feather data
         self.assertEqual(type(dataset), OpenMLDataset)
-        self.assertEqual(dataset.name, 'iris')
+        self.assertEqual(dataset.name, "iris")
         self.assertGreater(len(dataset.features), 1)
         self.assertGreater(len(dataset.qualities), 4)
 
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index 25651a8cc..6fcaea2d4 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -9,30 +9,29 @@ class TestEvaluationFunctions(TestBase):
     _multiprocess_can_split_ = True
 
     def _check_list_evaluation_setups(self, **kwargs):
-        evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
-                                                                  **kwargs,
-                                                                  sort_order='desc',
-                                                                  output_format='dataframe')
-        evals = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                    **kwargs,
-                                                    sort_order='desc',
-                                                    output_format='dataframe')
+        evals_setups = openml.evaluations.list_evaluations_setups(
+            "predictive_accuracy", **kwargs, sort_order="desc", output_format="dataframe"
+        )
+        evals = openml.evaluations.list_evaluations(
+            "predictive_accuracy", **kwargs, sort_order="desc", output_format="dataframe"
+        )
 
         # Check if list is non-empty
         self.assertGreater(len(evals_setups), 0)
         # Check if length is accurate
         self.assertEqual(len(evals_setups), len(evals))
         # Check if output from sort is sorted in the right order
-        self.assertSequenceEqual(sorted(evals_setups['value'].tolist(), reverse=True),
-                                 evals_setups['value'].tolist())
+        self.assertSequenceEqual(
+            sorted(evals_setups["value"].tolist(), reverse=True), evals_setups["value"].tolist()
+        )
 
         # Check if output and order of list_evaluations is preserved
-        self.assertSequenceEqual(evals_setups['run_id'].tolist(), evals['run_id'].tolist())
+        self.assertSequenceEqual(evals_setups["run_id"].tolist(), evals["run_id"].tolist())
         # Check if the hyper-parameter column is as accurate and flow_id
         for index, row in evals_setups.iterrows():
-            params = openml.runs.get_run(row['run_id']).parameter_settings
-            list1 = [param['oml:value'] for param in params]
-            list2 = list(row['parameters'].values())
+            params = openml.runs.get_run(row["run_id"]).parameter_settings
+            list1 = [param["oml:value"] for param in params]
+            list2 = list(row["parameters"].values())
             # check if all values are equal
             self.assertSequenceEqual(sorted(list1), sorted(list2))
         return evals_setups
@@ -42,8 +41,7 @@ def test_evaluation_list_filter_task(self):
 
         task_id = 7312
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          task=[task_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", task=[task_id])
 
         self.assertGreater(len(evaluations), 100)
         for run_id in evaluations.keys():
@@ -57,10 +55,10 @@ def test_evaluation_list_filter_uploader_ID_16(self):
         openml.config.server = self.production_server
 
         uploader_id = 16
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          uploader=[uploader_id],
-                                                          output_format='dataframe')
-        self.assertEqual(evaluations['uploader'].unique(), [uploader_id])
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", uploader=[uploader_id], output_format="dataframe"
+        )
+        self.assertEqual(evaluations["uploader"].unique(), [uploader_id])
 
         self.assertGreater(len(evaluations), 50)
 
@@ -68,8 +66,7 @@ def test_evaluation_list_filter_uploader_ID_10(self):
         openml.config.server = self.production_server
 
         setup_id = 10
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          setup=[setup_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])
 
         self.assertGreater(len(evaluations), 50)
         for run_id in evaluations.keys():
@@ -84,8 +81,7 @@ def test_evaluation_list_filter_flow(self):
 
         flow_id = 100
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          flow=[flow_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flow=[flow_id])
 
         self.assertGreater(len(evaluations), 2)
         for run_id in evaluations.keys():
@@ -100,8 +96,7 @@ def test_evaluation_list_filter_run(self):
 
         run_id = 12
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          run=[run_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", run=[run_id])
 
         self.assertEqual(len(evaluations), 1)
         for run_id in evaluations.keys():
@@ -114,14 +109,15 @@ def test_evaluation_list_filter_run(self):
     def test_evaluation_list_limit(self):
         openml.config.server = self.production_server
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy",
-                                                          size=100, offset=100)
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=100, offset=100
+        )
         self.assertEqual(len(evaluations), 100)
 
     def test_list_evaluations_empty(self):
-        evaluations = openml.evaluations.list_evaluations('unexisting_measure')
+        evaluations = openml.evaluations.list_evaluations("unexisting_measure")
         if len(evaluations) > 0:
-            raise ValueError('UnitTest Outdated, got somehow results')
+            raise ValueError("UnitTest Outdated, got somehow results")
 
         self.assertIsInstance(evaluations, dict)
 
@@ -133,8 +129,14 @@ def test_evaluation_list_per_fold(self):
         flow_ids = [6969]
 
         evaluations = openml.evaluations.list_evaluations(
-            "predictive_accuracy", size=size, offset=0, task=task_ids,
-            flow=flow_ids, uploader=uploader_ids, per_fold=True)
+            "predictive_accuracy",
+            size=size,
+            offset=0,
+            task=task_ids,
+            flow=flow_ids,
+            uploader=uploader_ids,
+            per_fold=True,
+        )
 
         self.assertEqual(len(evaluations), size)
         for run_id in evaluations.keys():
@@ -144,8 +146,14 @@ def test_evaluation_list_per_fold(self):
             # added in the future
 
         evaluations = openml.evaluations.list_evaluations(
-            "predictive_accuracy", size=size, offset=0, task=task_ids,
-            flow=flow_ids, uploader=uploader_ids, per_fold=False)
+            "predictive_accuracy",
+            size=size,
+            offset=0,
+            task=task_ids,
+            flow=flow_ids,
+            uploader=uploader_ids,
+            per_fold=False,
+        )
         for run_id in evaluations.keys():
             self.assertIsNotNone(evaluations[run_id].value)
             self.assertIsNone(evaluations[run_id].values)
@@ -156,10 +164,12 @@ def test_evaluation_list_sort(self):
         task_id = 6
         # Get all evaluations of the task
         unsorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", offset=0, task=[task_id])
+            "predictive_accuracy", offset=0, task=[task_id]
+        )
         # Get top 10 evaluations of the same task
         sorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", size=size, offset=0, task=[task_id], sort_order="desc")
+            "predictive_accuracy", size=size, offset=0, task=[task_id], sort_order="desc"
+        )
         self.assertEqual(len(sorted_eval), size)
         self.assertGreater(len(unsorted_eval), 0)
         sorted_output = [evaluation.value for evaluation in sorted_eval.values()]
@@ -183,14 +193,16 @@ def test_list_evaluations_setups_filter_flow(self):
         size = 100
         evals = self._check_list_evaluation_setups(flow=flow_id, size=size)
         # check if parameters in separate columns works
-        evals_cols = openml.evaluations.list_evaluations_setups("predictive_accuracy",
-                                                                flow=flow_id, size=size,
-                                                                sort_order='desc',
-                                                                output_format='dataframe',
-                                                                parameters_in_separate_columns=True
-                                                                )
-        columns = (list(evals_cols.columns))
-        keys = (list(evals['parameters'].values[0].keys()))
+        evals_cols = openml.evaluations.list_evaluations_setups(
+            "predictive_accuracy",
+            flow=flow_id,
+            size=size,
+            sort_order="desc",
+            output_format="dataframe",
+            parameters_in_separate_columns=True,
+        )
+        columns = list(evals_cols.columns)
+        keys = list(evals["parameters"].values[0].keys())
         self.assertTrue(all(elem in columns for elem in keys))
 
     def test_list_evaluations_setups_filter_task(self):
diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py
index 50e3e4079..61b6c359e 100644
--- a/tests/test_evaluations/test_evaluations_example.py
+++ b/tests/test_evaluations/test_evaluations_example.py
@@ -4,7 +4,6 @@
 
 
 class TestEvaluationsExample(unittest.TestCase):
-
     def test_example_python_paper(self):
         # Example script which will appear in the upcoming OpenML-Python paper
         # This test ensures that the example will keep running!
@@ -14,23 +13,23 @@ def test_example_python_paper(self):
         import matplotlib.pyplot as plt
 
         df = openml.evaluations.list_evaluations_setups(
-            'predictive_accuracy',
+            "predictive_accuracy",
             flow=[8353],
             task=[6],
-            output_format='dataframe',
+            output_format="dataframe",
             parameters_in_separate_columns=True,
         )  # Choose an SVM flow, for example 8353, and a task.
 
-        hp_names = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
+        hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
         df[hp_names] = df[hp_names].astype(float).apply(np.log)
-        C, gamma, score = df[hp_names[0]], df[hp_names[1]], df['value']
+        C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]
 
-        cntr = plt.tricontourf(C, gamma, score, levels=12, cmap='RdBu_r')
-        plt.colorbar(cntr, label='accuracy')
+        cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
+        plt.colorbar(cntr, label="accuracy")
         plt.xlim((min(C), max(C)))
         plt.ylim((min(gamma), max(gamma)))
-        plt.xlabel('C (log10)', size=16)
-        plt.ylabel('gamma (log10)', size=16)
-        plt.title('SVM performance landscape', size=20)
+        plt.xlabel("C (log10)", size=16)
+        plt.ylabel("gamma (log10)", size=16)
+        plt.title("SVM performance landscape", size=20)
 
         plt.tight_layout()
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index 3da91b789..85361cc02 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -8,7 +8,7 @@
 
 
 class DummyFlow:
-    external_version = 'DummyFlow==0.1'
+    external_version = "DummyFlow==0.1"
 
 
 class DummyModel:
@@ -16,22 +16,20 @@ class DummyModel:
 
 
 class DummyExtension1:
-
     @staticmethod
     def can_handle_flow(flow):
-        if not inspect.stack()[2].filename.endswith('test_functions.py'):
+        if not inspect.stack()[2].filename.endswith("test_functions.py"):
             return False
         return True
 
     @staticmethod
     def can_handle_model(model):
-        if not inspect.stack()[2].filename.endswith('test_functions.py'):
+        if not inspect.stack()[2].filename.endswith("test_functions.py"):
             return False
         return True
 
 
 class DummyExtension2:
-
     @staticmethod
     def can_handle_flow(flow):
         return False
@@ -61,14 +59,13 @@ def _unregister():
 
 
 class TestInit(openml.testing.TestBase):
-
     def setUp(self):
         super().setUp()
         _unregister()
 
     def test_get_extension_by_flow(self):
         self.assertIsNone(get_extension_by_flow(DummyFlow()))
-        with self.assertRaisesRegex(ValueError, 'No extension registered which can handle flow:'):
+        with self.assertRaisesRegex(ValueError, "No extension registered which can handle flow:"):
             get_extension_by_flow(DummyFlow(), raise_if_no_extension=True)
         register_extension(DummyExtension1)
         self.assertIsInstance(get_extension_by_flow(DummyFlow()), DummyExtension1)
@@ -76,14 +73,13 @@ def test_get_extension_by_flow(self):
         self.assertIsInstance(get_extension_by_flow(DummyFlow()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError,
-            'Multiple extensions registered which can handle flow:',
+            ValueError, "Multiple extensions registered which can handle flow:",
         ):
             get_extension_by_flow(DummyFlow())
 
     def test_get_extension_by_model(self):
         self.assertIsNone(get_extension_by_model(DummyModel()))
-        with self.assertRaisesRegex(ValueError, 'No extension registered which can handle model:'):
+        with self.assertRaisesRegex(ValueError, "No extension registered which can handle model:"):
             get_extension_by_model(DummyModel(), raise_if_no_extension=True)
         register_extension(DummyExtension1)
         self.assertIsInstance(get_extension_by_model(DummyModel()), DummyExtension1)
@@ -91,7 +87,6 @@ def test_get_extension_by_model(self):
         self.assertIsInstance(get_extension_by_model(DummyModel()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError,
-            'Multiple extensions registered which can handle model:',
+            ValueError, "Multiple extensions registered which can handle model:",
         ):
             get_extension_by_model(DummyModel())
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index bce58077c..48832b58f 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -72,52 +72,57 @@ def setUp(self):
         self.extension = SklearnExtension()
 
     def test_serialize_model(self):
-        with mock.patch.object(self.extension, '_check_dependencies') as check_dependencies_mock:
-            model = sklearn.tree.DecisionTreeClassifier(criterion='entropy',
-                                                        max_features='auto',
-                                                        max_leaf_nodes=2000)
+        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
+            model = sklearn.tree.DecisionTreeClassifier(
+                criterion="entropy", max_features="auto", max_leaf_nodes=2000
+            )
 
-            fixture_name = 'sklearn.tree.tree.DecisionTreeClassifier'
-            fixture_short_name = 'sklearn.DecisionTreeClassifier'
+            fixture_name = "sklearn.tree.tree.DecisionTreeClassifier"
+            fixture_short_name = "sklearn.DecisionTreeClassifier"
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = 'A decision tree classifier.'
-            version_fixture = 'sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9' \
-                              % sklearn.__version__
+            fixture_description = "A decision tree classifier."
+            version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
             # min_impurity_decrease has been introduced in 0.20
             # min_impurity_split has been deprecated in 0.20
             if LooseVersion(sklearn.__version__) < "0.19":
-                fixture_parameters = \
-                    OrderedDict((('class_weight', 'null'),
-                                ('criterion', '"entropy"'),
-                                ('max_depth', 'null'),
-                                ('max_features', '"auto"'),
-                                ('max_leaf_nodes', '2000'),
-                                ('min_impurity_split', '1e-07'),
-                                ('min_samples_leaf', '1'),
-                                ('min_samples_split', '2'),
-                                ('min_weight_fraction_leaf', '0.0'),
-                                ('presort', 'false'),
-                                ('random_state', 'null'),
-                                ('splitter', '"best"')))
+                fixture_parameters = OrderedDict(
+                    (
+                        ("class_weight", "null"),
+                        ("criterion", '"entropy"'),
+                        ("max_depth", "null"),
+                        ("max_features", '"auto"'),
+                        ("max_leaf_nodes", "2000"),
+                        ("min_impurity_split", "1e-07"),
+                        ("min_samples_leaf", "1"),
+                        ("min_samples_split", "2"),
+                        ("min_weight_fraction_leaf", "0.0"),
+                        ("presort", "false"),
+                        ("random_state", "null"),
+                        ("splitter", '"best"'),
+                    )
+                )
             else:
-                fixture_parameters = \
-                    OrderedDict((('class_weight', 'null'),
-                                ('criterion', '"entropy"'),
-                                ('max_depth', 'null'),
-                                ('max_features', '"auto"'),
-                                ('max_leaf_nodes', '2000'),
-                                ('min_impurity_decrease', '0.0'),
-                                ('min_impurity_split', 'null'),
-                                ('min_samples_leaf', '1'),
-                                ('min_samples_split', '2'),
-                                ('min_weight_fraction_leaf', '0.0'),
-                                ('presort', 'false'),
-                                ('random_state', 'null'),
-                                ('splitter', '"best"')))
-            structure_fixture = {'sklearn.tree.tree.DecisionTreeClassifier': []}
+                fixture_parameters = OrderedDict(
+                    (
+                        ("class_weight", "null"),
+                        ("criterion", '"entropy"'),
+                        ("max_depth", "null"),
+                        ("max_features", '"auto"'),
+                        ("max_leaf_nodes", "2000"),
+                        ("min_impurity_decrease", "0.0"),
+                        ("min_impurity_split", "null"),
+                        ("min_samples_leaf", "1"),
+                        ("min_samples_split", "2"),
+                        ("min_weight_fraction_leaf", "0.0"),
+                        ("presort", "false"),
+                        ("random_state", "null"),
+                        ("splitter", '"best"'),
+                    )
+                )
+            structure_fixture = {"sklearn.tree.tree.DecisionTreeClassifier": []}
 
             serialization = self.extension.model_to_flow(model)
-            structure = serialization.get_structure('name')
+            structure = serialization.get_structure("name")
 
             self.assertEqual(serialization.name, fixture_name)
             self.assertEqual(serialization.class_name, fixture_name)
@@ -153,46 +158,51 @@ def test_can_handle_flow(self):
         openml.config.server = self.test_server
 
     def test_serialize_model_clustering(self):
-        with mock.patch.object(self.extension, '_check_dependencies') as check_dependencies_mock:
+        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
             model = sklearn.cluster.KMeans()
 
-            fixture_name = 'sklearn.cluster.k_means_.KMeans'
-            fixture_short_name = 'sklearn.KMeans'
+            fixture_name = "sklearn.cluster.k_means_.KMeans"
+            fixture_short_name = "sklearn.KMeans"
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = 'K-Means clustering'
-            version_fixture = 'sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9' \
-                              % sklearn.__version__
+            fixture_description = "K-Means clustering"
+            version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
             # n_jobs default has changed to None in 0.20
             if LooseVersion(sklearn.__version__) < "0.20":
-                fixture_parameters = \
-                    OrderedDict((('algorithm', '"auto"'),
-                                 ('copy_x', 'true'),
-                                 ('init', '"k-means++"'),
-                                 ('max_iter', '300'),
-                                 ('n_clusters', '8'),
-                                 ('n_init', '10'),
-                                 ('n_jobs', '1'),
-                                 ('precompute_distances', '"auto"'),
-                                 ('random_state', 'null'),
-                                 ('tol', '0.0001'),
-                                 ('verbose', '0')))
+                fixture_parameters = OrderedDict(
+                    (
+                        ("algorithm", '"auto"'),
+                        ("copy_x", "true"),
+                        ("init", '"k-means++"'),
+                        ("max_iter", "300"),
+                        ("n_clusters", "8"),
+                        ("n_init", "10"),
+                        ("n_jobs", "1"),
+                        ("precompute_distances", '"auto"'),
+                        ("random_state", "null"),
+                        ("tol", "0.0001"),
+                        ("verbose", "0"),
+                    )
+                )
             else:
-                fixture_parameters = \
-                    OrderedDict((('algorithm', '"auto"'),
-                                 ('copy_x', 'true'),
-                                 ('init', '"k-means++"'),
-                                 ('max_iter', '300'),
-                                 ('n_clusters', '8'),
-                                 ('n_init', '10'),
-                                 ('n_jobs', 'null'),
-                                 ('precompute_distances', '"auto"'),
-                                 ('random_state', 'null'),
-                                 ('tol', '0.0001'),
-                                 ('verbose', '0')))
-            fixture_structure = {'sklearn.cluster.k_means_.KMeans': []}
+                fixture_parameters = OrderedDict(
+                    (
+                        ("algorithm", '"auto"'),
+                        ("copy_x", "true"),
+                        ("init", '"k-means++"'),
+                        ("max_iter", "300"),
+                        ("n_clusters", "8"),
+                        ("n_init", "10"),
+                        ("n_jobs", "null"),
+                        ("precompute_distances", '"auto"'),
+                        ("random_state", "null"),
+                        ("tol", "0.0001"),
+                        ("verbose", "0"),
+                    )
+                )
+            fixture_structure = {"sklearn.cluster.k_means_.KMeans": []}
 
             serialization = self.extension.model_to_flow(model)
-            structure = serialization.get_structure('name')
+            structure = serialization.get_structure("name")
 
             self.assertEqual(serialization.name, fixture_name)
             self.assertEqual(serialization.class_name, fixture_name)
@@ -217,46 +227,52 @@ def test_serialize_model_clustering(self):
 
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
-            n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier())
+            n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier()
+        )
 
-        fixture_name = 'sklearn.ensemble.weight_boosting.AdaBoostClassifier' \
-                       '(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)'
-        fixture_class_name = 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'
-        fixture_short_name = 'sklearn.AdaBoostClassifier'
+        fixture_name = (
+            "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
+            "(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)"
+        )
+        fixture_class_name = "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
+        fixture_short_name = "sklearn.AdaBoostClassifier"
         # str obtained from self.extension._get_sklearn_description(model)
-        fixture_description = 'An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a '\
-                              'meta-estimator that begins by fitting a\nclassifier on the original'\
-                              ' dataset and then fits additional copies of the\nclassifier on the '\
-                              'same dataset but where the weights of incorrectly\nclassified '\
-                              'instances are adjusted such that subsequent classifiers focus\nmore'\
-                              ' on difficult cases.\n\nThis class implements the algorithm known '\
-                              'as AdaBoost-SAMME [2].'
-        fixture_subcomponent_name = 'sklearn.tree.tree.DecisionTreeClassifier'
-        fixture_subcomponent_class_name = 'sklearn.tree.tree.DecisionTreeClassifier'
+        fixture_description = (
+            "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a "
+            "meta-estimator that begins by fitting a\nclassifier on the original"
+            " dataset and then fits additional copies of the\nclassifier on the "
+            "same dataset but where the weights of incorrectly\nclassified "
+            "instances are adjusted such that subsequent classifiers focus\nmore"
+            " on difficult cases.\n\nThis class implements the algorithm known "
+            "as AdaBoost-SAMME [2]."
+        )
+        fixture_subcomponent_name = "sklearn.tree.tree.DecisionTreeClassifier"
+        fixture_subcomponent_class_name = "sklearn.tree.tree.DecisionTreeClassifier"
         # str obtained from self.extension._get_sklearn_description(model.base_estimator)
-        fixture_subcomponent_description = 'A decision tree classifier.'
+        fixture_subcomponent_description = "A decision tree classifier."
         fixture_structure = {
             fixture_name: [],
-            'sklearn.tree.tree.DecisionTreeClassifier': ['base_estimator']
+            "sklearn.tree.tree.DecisionTreeClassifier": ["base_estimator"],
         }
 
         serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.class_name, fixture_class_name)
         self.assertEqual(serialization.custom_name, fixture_short_name)
         self.assertEqual(serialization.description, fixture_description)
-        self.assertEqual(serialization.parameters['algorithm'], '"SAMME.R"')
-        self.assertIsInstance(serialization.parameters['base_estimator'], str)
-        self.assertEqual(serialization.parameters['learning_rate'], '1.0')
-        self.assertEqual(serialization.parameters['n_estimators'], '100')
-        self.assertEqual(serialization.components['base_estimator'].name,
-                         fixture_subcomponent_name)
-        self.assertEqual(serialization.components['base_estimator'].class_name,
-                         fixture_subcomponent_class_name)
-        self.assertEqual(serialization.components['base_estimator'].description,
-                         fixture_subcomponent_description)
+        self.assertEqual(serialization.parameters["algorithm"], '"SAMME.R"')
+        self.assertIsInstance(serialization.parameters["base_estimator"], str)
+        self.assertEqual(serialization.parameters["learning_rate"], "1.0")
+        self.assertEqual(serialization.parameters["n_estimators"], "100")
+        self.assertEqual(serialization.components["base_estimator"].name, fixture_subcomponent_name)
+        self.assertEqual(
+            serialization.components["base_estimator"].class_name, fixture_subcomponent_class_name
+        )
+        self.assertEqual(
+            serialization.components["base_estimator"].description, fixture_subcomponent_description
+        )
         self.assertDictEqual(structure, fixture_structure)
 
         new_model = self.extension.flow_to_model(serialization)
@@ -268,53 +284,55 @@ def test_serialize_model_with_subcomponent(self):
         self.assertIsNot(new_model, model)
 
         self.assertIsNot(new_model.base_estimator, model.base_estimator)
-        self.assertEqual(new_model.base_estimator.get_params(),
-                         model.base_estimator.get_params())
+        self.assertEqual(new_model.base_estimator.get_params(), model.base_estimator.get_params())
         new_model_params = new_model.get_params()
-        del new_model_params['base_estimator']
+        del new_model_params["base_estimator"]
         model_params = model.get_params()
-        del model_params['base_estimator']
+        del model_params["base_estimator"]
 
         self.assertEqual(new_model_params, model_params)
         new_model.fit(self.X, self.y)
 
     def test_serialize_pipeline(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
-        dummy = sklearn.dummy.DummyClassifier(strategy='prior')
-        model = sklearn.pipeline.Pipeline(steps=[
-            ('scaler', scaler), ('dummy', dummy)])
+        dummy = sklearn.dummy.DummyClassifier(strategy="prior")
+        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("dummy", dummy)])
 
-        fixture_name = 'sklearn.pipeline.Pipeline(' \
-                       'scaler=sklearn.preprocessing.data.StandardScaler,' \
-                       'dummy=sklearn.dummy.DummyClassifier)'
-        fixture_short_name = 'sklearn.Pipeline(StandardScaler,DummyClassifier)'
+        fixture_name = (
+            "sklearn.pipeline.Pipeline("
+            "scaler=sklearn.preprocessing.data.StandardScaler,"
+            "dummy=sklearn.dummy.DummyClassifier)"
+        )
+        fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
 
         if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially"\
-                                  " apply a list of transforms and a final estimator.\n"\
-                                  "Intermediate steps of the pipeline must be 'transforms', that "\
-                                  "is, they\nmust implement fit and transform methods.\nThe final "\
-                                  "estimator only needs to implement fit.\nThe transformers in "\
-                                  "the pipeline can be cached using ``memory`` argument.\n\nThe "\
-                                  "purpose of the pipeline is to assemble several steps that can "\
-                                  "be\ncross-validated together while setting different parameters"\
-                                  ".\nFor this, it enables setting parameters of the various steps"\
-                                  " using their\nnames and the parameter name separated by a '__',"\
-                                  " as in the example below.\nA step's estimator may be replaced "\
-                                  "entirely by setting the parameter\nwith its name to another "\
-                                  "estimator, or a transformer removed by setting\nit to "\
-                                  "'passthrough' or ``None``."
+            fixture_description = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement fit and transform methods.\nThe final "
+                "estimator only needs to implement fit.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ".\nFor this, it enables setting parameters of the various steps"
+                " using their\nnames and the parameter name separated by a '__',"
+                " as in the example below.\nA step's estimator may be replaced "
+                "entirely by setting the parameter\nwith its name to another "
+                "estimator, or a transformer removed by setting\nit to "
+                "'passthrough' or ``None``."
+            )
         else:
             fixture_description = self.extension._get_sklearn_description(model)
 
         fixture_structure = {
             fixture_name: [],
-            'sklearn.preprocessing.data.StandardScaler': ['scaler'],
-            'sklearn.dummy.DummyClassifier': ['dummy']
+            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.dummy.DummyClassifier": ["dummy"],
         }
 
         serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.custom_name, fixture_short_name)
@@ -335,52 +353,46 @@ def test_serialize_pipeline(self):
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(
-            json.loads(serialization.parameters['steps']),
+            json.loads(serialization.parameters["steps"]),
             [
                 {
-                    'oml-python:serialized_object':
-                        'component_reference',
-                    'value': {'key': 'scaler', 'step_name': 'scaler'}
+                    "oml-python:serialized_object": "component_reference",
+                    "value": {"key": "scaler", "step_name": "scaler"},
                 },
                 {
-                    'oml-python:serialized_object':
-                        'component_reference',
-                    'value': {'key': 'dummy', 'step_name': 'dummy'}
-                }
-            ]
+                    "oml-python:serialized_object": "component_reference",
+                    "value": {"key": "dummy", "step_name": "dummy"},
+                },
+            ],
         )
 
         # Checking the sub-component
         self.assertEqual(len(serialization.components), 2)
-        self.assertIsInstance(serialization.components['scaler'],
-                              OpenMLFlow)
-        self.assertIsInstance(serialization.components['dummy'],
-                              OpenMLFlow)
+        self.assertIsInstance(serialization.components["scaler"], OpenMLFlow)
+        self.assertIsInstance(serialization.components["dummy"], OpenMLFlow)
 
         new_model = self.extension.flow_to_model(serialization)
         # compares string representations of the dict, as it potentially
         # contains complex objects that can not be compared with == op
         # Only in Python 3.x, as Python 2 has Unicode issues
         if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()),
-                             str(new_model.get_params()))
+            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
 
         self.assertEqual(type(new_model), type(model))
         self.assertIsNot(new_model, model)
 
-        self.assertEqual([step[0] for step in new_model.steps],
-                         [step[0] for step in model.steps])
+        self.assertEqual([step[0] for step in new_model.steps], [step[0] for step in model.steps])
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
         new_model_params = new_model.get_params()
-        del new_model_params['scaler']
-        del new_model_params['dummy']
-        del new_model_params['steps']
+        del new_model_params["scaler"]
+        del new_model_params["dummy"]
+        del new_model_params["steps"]
         fu_params = model.get_params()
-        del fu_params['scaler']
-        del fu_params['dummy']
-        del fu_params['steps']
+        del fu_params["scaler"]
+        del fu_params["dummy"]
+        del fu_params["steps"]
 
         self.assertEqual(new_model_params, fu_params)
         new_model.fit(self.X, self.y)
@@ -388,39 +400,42 @@ def test_serialize_pipeline(self):
     def test_serialize_pipeline_clustering(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         km = sklearn.cluster.KMeans()
-        model = sklearn.pipeline.Pipeline(steps=[
-            ('scaler', scaler), ('clusterer', km)])
+        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("clusterer", km)])
 
-        fixture_name = 'sklearn.pipeline.Pipeline(' \
-                       'scaler=sklearn.preprocessing.data.StandardScaler,' \
-                       'clusterer=sklearn.cluster.k_means_.KMeans)'
-        fixture_short_name = 'sklearn.Pipeline(StandardScaler,KMeans)'
+        fixture_name = (
+            "sklearn.pipeline.Pipeline("
+            "scaler=sklearn.preprocessing.data.StandardScaler,"
+            "clusterer=sklearn.cluster.k_means_.KMeans)"
+        )
+        fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
 
         if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially"\
-                                  " apply a list of transforms and a final estimator.\n"\
-                                  "Intermediate steps of the pipeline must be 'transforms', that "\
-                                  "is, they\nmust implement fit and transform methods.\nThe final "\
-                                  "estimator only needs to implement fit.\nThe transformers in "\
-                                  "the pipeline can be cached using ``memory`` argument.\n\nThe "\
-                                  "purpose of the pipeline is to assemble several steps that can "\
-                                  "be\ncross-validated together while setting different parameters"\
-                                  ".\nFor this, it enables setting parameters of the various steps"\
-                                  " using their\nnames and the parameter name separated by a '__',"\
-                                  " as in the example below.\nA step's estimator may be replaced "\
-                                  "entirely by setting the parameter\nwith its name to another "\
-                                  "estimator, or a transformer removed by setting\nit to "\
-                                  "'passthrough' or ``None``."
+            fixture_description = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement fit and transform methods.\nThe final "
+                "estimator only needs to implement fit.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ".\nFor this, it enables setting parameters of the various steps"
+                " using their\nnames and the parameter name separated by a '__',"
+                " as in the example below.\nA step's estimator may be replaced "
+                "entirely by setting the parameter\nwith its name to another "
+                "estimator, or a transformer removed by setting\nit to "
+                "'passthrough' or ``None``."
+            )
         else:
             fixture_description = self.extension._get_sklearn_description(model)
         fixture_structure = {
             fixture_name: [],
-            'sklearn.preprocessing.data.StandardScaler': ['scaler'],
-            'sklearn.cluster.k_means_.KMeans': ['clusterer']
+            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.cluster.k_means_.KMeans": ["clusterer"],
         }
 
         serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.custom_name, fixture_short_name)
@@ -440,25 +455,23 @@ def test_serialize_pipeline_clustering(self):
         # Hard to compare two representations of a dict due to possibly
         # different sorting. Making a json makes it easier
         self.assertEqual(
-            json.loads(serialization.parameters['steps']),
+            json.loads(serialization.parameters["steps"]),
             [
                 {
-                    'oml-python:serialized_object': 'component_reference',
-                    'value': {'key': 'scaler', 'step_name': 'scaler'}
+                    "oml-python:serialized_object": "component_reference",
+                    "value": {"key": "scaler", "step_name": "scaler"},
                 },
                 {
-                    'oml-python:serialized_object': 'component_reference',
-                    'value': {'key': 'clusterer', 'step_name': 'clusterer'}
+                    "oml-python:serialized_object": "component_reference",
+                    "value": {"key": "clusterer", "step_name": "clusterer"},
                 },
-            ]
+            ],
         )
 
         # Checking the sub-component
         self.assertEqual(len(serialization.components), 2)
-        self.assertIsInstance(serialization.components['scaler'],
-                              OpenMLFlow)
-        self.assertIsInstance(serialization.components['clusterer'],
-                              OpenMLFlow)
+        self.assertIsInstance(serialization.components["scaler"], OpenMLFlow)
+        self.assertIsInstance(serialization.components["clusterer"], OpenMLFlow)
 
         # del serialization.model
         new_model = self.extension.flow_to_model(serialization)
@@ -466,66 +479,76 @@ def test_serialize_pipeline_clustering(self):
         # contains complex objects that can not be compared with == op
         # Only in Python 3.x, as Python 2 has Unicode issues
         if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()),
-                             str(new_model.get_params()))
+            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
 
         self.assertEqual(type(new_model), type(model))
         self.assertIsNot(new_model, model)
 
-        self.assertEqual([step[0] for step in new_model.steps],
-                         [step[0] for step in model.steps])
+        self.assertEqual([step[0] for step in new_model.steps], [step[0] for step in model.steps])
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
         new_model_params = new_model.get_params()
-        del new_model_params['scaler']
-        del new_model_params['clusterer']
-        del new_model_params['steps']
+        del new_model_params["scaler"]
+        del new_model_params["clusterer"]
+        del new_model_params["steps"]
         fu_params = model.get_params()
-        del fu_params['scaler']
-        del fu_params['clusterer']
-        del fu_params['steps']
+        del fu_params["scaler"]
+        del fu_params["clusterer"]
+        del fu_params["steps"]
 
         self.assertEqual(new_model_params, fu_params)
         new_model.fit(self.X, self.y)
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
-                     reason="columntransformer introduction in 0.20.0")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
     def test_serialize_column_transformer(self):
         # temporary local import, dependend on version 0.20
         import sklearn.compose
+
         model = sklearn.compose.ColumnTransformer(
             transformers=[
-                ('numeric', sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
-                ('nominal', sklearn.preprocessing.OneHotEncoder(
-                    handle_unknown='ignore'), [3, 4, 5])],
-            remainder='passthrough')
-        fixture = 'sklearn.compose._column_transformer.ColumnTransformer(' \
-                  'numeric=sklearn.preprocessing.data.StandardScaler,' \
-                  'nominal=sklearn.preprocessing._encoders.OneHotEncoder)'
-        fixture_short_name = 'sklearn.ColumnTransformer'
+                ("numeric", sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
+                (
+                    "nominal",
+                    sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                    [3, 4, 5],
+                ),
+            ],
+            remainder="passthrough",
+        )
+        fixture = (
+            "sklearn.compose._column_transformer.ColumnTransformer("
+            "numeric=sklearn.preprocessing.data.StandardScaler,"
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)"
+        )
+        fixture_short_name = "sklearn.ColumnTransformer"
 
         if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = 'Applies transformers to columns of an array or pandas '\
-                                  'DataFrame.\n\nThis estimator allows different columns or '\
-                                  'column subsets of the input\nto be transformed separately and '\
-                                  'the features generated by each transformer\nwill be '\
-                                  'concatenated to form a single feature space.\nThis is useful '\
-                                  'for heterogeneous or columnar data, to combine several\nfeature'\
-                                  ' extraction mechanisms or transformations into a single '\
-                                  'transformer.'
+            fixture_description = (
+                "Applies transformers to columns of an array or pandas "
+                "DataFrame.\n\nThis estimator allows different columns or "
+                "column subsets of the input\nto be transformed separately and "
+                "the features generated by each transformer\nwill be "
+                "concatenated to form a single feature space.\nThis is useful "
+                "for heterogeneous or columnar data, to combine several\nfeature"
+                " extraction mechanisms or transformations into a single "
+                "transformer."
+            )
         else:
             fixture_description = self.extension._get_sklearn_description(model)
 
         fixture_structure = {
             fixture: [],
-            'sklearn.preprocessing.data.StandardScaler': ['numeric'],
-            'sklearn.preprocessing._encoders.OneHotEncoder': ['nominal']
+            "sklearn.preprocessing.data.StandardScaler": ["numeric"],
+            "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"],
         }
 
         serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture)
         self.assertEqual(serialization.custom_name, fixture_short_name)
         self.assertEqual(serialization.description, fixture_description)
@@ -536,67 +559,75 @@ def test_serialize_column_transformer(self):
         # contains complex objects that can not be compared with == op
         # Only in Python 3.x, as Python 2 has Unicode issues
         if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()),
-                             str(new_model.get_params()))
+            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
         self.assertEqual(type(new_model), type(model))
         self.assertIsNot(new_model, model)
         serialization2 = self.extension.model_to_flow(new_model)
         assert_flows_equal(serialization, serialization2)
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
-                     reason="columntransformer introduction in 0.20.0")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
     def test_serialize_column_transformer_pipeline(self):
         # temporary local import, dependend on version 0.20
         import sklearn.compose
+
         inner = sklearn.compose.ColumnTransformer(
             transformers=[
-                ('numeric', sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
-                ('nominal', sklearn.preprocessing.OneHotEncoder(
-                    handle_unknown='ignore'), [3, 4, 5])],
-            remainder='passthrough')
+                ("numeric", sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
+                (
+                    "nominal",
+                    sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                    [3, 4, 5],
+                ),
+            ],
+            remainder="passthrough",
+        )
         model = sklearn.pipeline.Pipeline(
-            steps=[('transformer', inner),
-                   ('classifier', sklearn.tree.DecisionTreeClassifier())])
-        fixture_name = \
-            'sklearn.pipeline.Pipeline('\
-            'transformer=sklearn.compose._column_transformer.'\
-            'ColumnTransformer('\
-            'numeric=sklearn.preprocessing.data.StandardScaler,'\
-            'nominal=sklearn.preprocessing._encoders.OneHotEncoder),'\
-            'classifier=sklearn.tree.tree.DecisionTreeClassifier)'
+            steps=[("transformer", inner), ("classifier", sklearn.tree.DecisionTreeClassifier())]
+        )
+        fixture_name = (
+            "sklearn.pipeline.Pipeline("
+            "transformer=sklearn.compose._column_transformer."
+            "ColumnTransformer("
+            "numeric=sklearn.preprocessing.data.StandardScaler,"
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder),"
+            "classifier=sklearn.tree.tree.DecisionTreeClassifier)"
+        )
         fixture_structure = {
-            'sklearn.preprocessing.data.StandardScaler':
-                ['transformer', 'numeric'],
-            'sklearn.preprocessing._encoders.OneHotEncoder':
-                ['transformer', 'nominal'],
-            'sklearn.compose._column_transformer.ColumnTransformer(numeric='
-            'sklearn.preprocessing.data.StandardScaler,nominal=sklearn.'
-            'preprocessing._encoders.OneHotEncoder)': ['transformer'],
-            'sklearn.tree.tree.DecisionTreeClassifier': ['classifier'],
+            "sklearn.preprocessing.data.StandardScaler": ["transformer", "numeric"],
+            "sklearn.preprocessing._encoders.OneHotEncoder": ["transformer", "nominal"],
+            "sklearn.compose._column_transformer.ColumnTransformer(numeric="
+            "sklearn.preprocessing.data.StandardScaler,nominal=sklearn."
+            "preprocessing._encoders.OneHotEncoder)": ["transformer"],
+            "sklearn.tree.tree.DecisionTreeClassifier": ["classifier"],
             fixture_name: [],
         }
 
         if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially"\
-                                  " apply a list of transforms and a final estimator.\n"\
-                                  "Intermediate steps of the pipeline must be 'transforms', that "\
-                                  "is, they\nmust implement fit and transform methods.\nThe final"\
-                                  " estimator only needs to implement fit.\nThe transformers in "\
-                                  "the pipeline can be cached using ``memory`` argument.\n\nThe "\
-                                  "purpose of the pipeline is to assemble several steps that can "\
-                                  "be\ncross-validated together while setting different "\
-                                  "parameters.\nFor this, it enables setting parameters of the "\
-                                  "various steps using their\nnames and the parameter name "\
-                                  "separated by a '__', as in the example below.\nA step's "\
-                                  "estimator may be replaced entirely by setting the parameter\n"\
-                                  "with its name to another estimator, or a transformer removed by"\
-                                  " setting\nit to 'passthrough' or ``None``."
+            fixture_description = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement fit and transform methods.\nThe final"
+                " estimator only needs to implement fit.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different "
+                "parameters.\nFor this, it enables setting parameters of the "
+                "various steps using their\nnames and the parameter name "
+                "separated by a '__', as in the example below.\nA step's "
+                "estimator may be replaced entirely by setting the parameter\n"
+                "with its name to another estimator, or a transformer removed by"
+                " setting\nit to 'passthrough' or ``None``."
+            )
         else:
             fixture_description = self.extension._get_sklearn_description(model)
 
         serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.description, fixture_description)
         self.assertDictEqual(structure, fixture_structure)
@@ -610,33 +641,30 @@ def test_serialize_column_transformer_pipeline(self):
         serialization2 = self.extension.model_to_flow(new_model)
         assert_flows_equal(serialization, serialization2)
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
-                     reason="Pipeline processing behaviour updated")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20", reason="Pipeline processing behaviour updated"
+    )
     def test_serialize_feature_union(self):
-        ohe_params = {'sparse': False}
+        ohe_params = {"sparse": False}
         if LooseVersion(sklearn.__version__) >= "0.20":
-            ohe_params['categories'] = 'auto'
+            ohe_params["categories"] = "auto"
         ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
         scaler = sklearn.preprocessing.StandardScaler()
 
-        fu = sklearn.pipeline.FeatureUnion(
-            transformer_list=[('ohe', ohe), ('scaler', scaler)]
-        )
+        fu = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
         serialization = self.extension.model_to_flow(fu)
-        structure = serialization.get_structure('name')
+        structure = serialization.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = ('_encoders'
-                               if LooseVersion(sklearn.__version__) >= "0.20"
-                               else 'data')
-        fixture_name = ('sklearn.pipeline.FeatureUnion('
-                        'ohe=sklearn.preprocessing.{}.OneHotEncoder,'
-                        'scaler=sklearn.preprocessing.data.StandardScaler)'
-                        .format(module_name_encoder))
+        module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        fixture_name = (
+            "sklearn.pipeline.FeatureUnion("
+            "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
+            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder)
+        )
         fixture_structure = {
             fixture_name: [],
-            'sklearn.preprocessing.{}.'
-            'OneHotEncoder'.format(module_name_encoder): ['ohe'],
-            'sklearn.preprocessing.data.StandardScaler': ['scaler']
+            "sklearn.preprocessing.{}." "OneHotEncoder".format(module_name_encoder): ["ohe"],
+            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
         }
         self.assertEqual(serialization.name, fixture_name)
         self.assertDictEqual(structure, fixture_structure)
@@ -645,119 +673,119 @@ def test_serialize_feature_union(self):
         # contains complex objects that can not be compared with == op
         # Only in Python 3.x, as Python 2 has Unicode issues
         if sys.version_info[0] >= 3:
-            self.assertEqual(str(fu.get_params()),
-                             str(new_model.get_params()))
+            self.assertEqual(str(fu.get_params()), str(new_model.get_params()))
 
         self.assertEqual(type(new_model), type(fu))
         self.assertIsNot(new_model, fu)
-        self.assertEqual(new_model.transformer_list[0][0],
-                         fu.transformer_list[0][0])
-        self.assertEqual(new_model.transformer_list[0][1].get_params(),
-                         fu.transformer_list[0][1].get_params())
-        self.assertEqual(new_model.transformer_list[1][0],
-                         fu.transformer_list[1][0])
-        self.assertEqual(new_model.transformer_list[1][1].get_params(),
-                         fu.transformer_list[1][1].get_params())
-
-        self.assertEqual([step[0] for step in new_model.transformer_list],
-                         [step[0] for step in fu.transformer_list])
-        self.assertIsNot(new_model.transformer_list[0][1],
-                         fu.transformer_list[0][1])
-        self.assertIsNot(new_model.transformer_list[1][1],
-                         fu.transformer_list[1][1])
+        self.assertEqual(new_model.transformer_list[0][0], fu.transformer_list[0][0])
+        self.assertEqual(
+            new_model.transformer_list[0][1].get_params(), fu.transformer_list[0][1].get_params()
+        )
+        self.assertEqual(new_model.transformer_list[1][0], fu.transformer_list[1][0])
+        self.assertEqual(
+            new_model.transformer_list[1][1].get_params(), fu.transformer_list[1][1].get_params()
+        )
+
+        self.assertEqual(
+            [step[0] for step in new_model.transformer_list],
+            [step[0] for step in fu.transformer_list],
+        )
+        self.assertIsNot(new_model.transformer_list[0][1], fu.transformer_list[0][1])
+        self.assertIsNot(new_model.transformer_list[1][1], fu.transformer_list[1][1])
 
         new_model_params = new_model.get_params()
-        del new_model_params['ohe']
-        del new_model_params['scaler']
-        del new_model_params['transformer_list']
+        del new_model_params["ohe"]
+        del new_model_params["scaler"]
+        del new_model_params["transformer_list"]
         fu_params = fu.get_params()
-        del fu_params['ohe']
-        del fu_params['scaler']
-        del fu_params['transformer_list']
+        del fu_params["ohe"]
+        del fu_params["scaler"]
+        del fu_params["transformer_list"]
 
         self.assertEqual(new_model_params, fu_params)
         new_model.fit(self.X, self.y)
 
-        fu.set_params(scaler='drop')
+        fu.set_params(scaler="drop")
         serialization = self.extension.model_to_flow(fu)
-        self.assertEqual(serialization.name,
-                         'sklearn.pipeline.FeatureUnion('
-                         'ohe=sklearn.preprocessing.{}.OneHotEncoder,'
-                         'scaler=drop)'
-                         .format(module_name_encoder))
+        self.assertEqual(
+            serialization.name,
+            "sklearn.pipeline.FeatureUnion("
+            "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
+            "scaler=drop)".format(module_name_encoder),
+        )
         new_model = self.extension.flow_to_model(serialization)
         self.assertEqual(type(new_model), type(fu))
         self.assertIsNot(new_model, fu)
-        self.assertIs(new_model.transformer_list[1][1], 'drop')
+        self.assertIs(new_model.transformer_list[1][1], "drop")
 
     def test_serialize_feature_union_switched_names(self):
-        ohe_params = ({'categories': 'auto'}
-                      if LooseVersion(sklearn.__version__) >= "0.20" else {})
+        ohe_params = {"categories": "auto"} if LooseVersion(sklearn.__version__) >= "0.20" else {}
         ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
         scaler = sklearn.preprocessing.StandardScaler()
-        fu1 = sklearn.pipeline.FeatureUnion(
-            transformer_list=[('ohe', ohe), ('scaler', scaler)])
-        fu2 = sklearn.pipeline.FeatureUnion(
-            transformer_list=[('scaler', ohe), ('ohe', scaler)])
+        fu1 = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
+        fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[("scaler", ohe), ("ohe", scaler)])
         fu1_serialization = self.extension.model_to_flow(fu1)
         fu2_serialization = self.extension.model_to_flow(fu2)
         # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = ('_encoders'
-                               if LooseVersion(sklearn.__version__) >= "0.20"
-                               else 'data')
+        module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
         self.assertEqual(
             fu1_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler)"
-            .format(module_name_encoder))
+            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+        )
         self.assertEqual(
             fu2_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "scaler=sklearn.preprocessing.{}.OneHotEncoder,"
-            "ohe=sklearn.preprocessing.data.StandardScaler)"
-            .format(module_name_encoder))
+            "ohe=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+        )
 
     def test_serialize_complex_flow(self):
         ohe = sklearn.preprocessing.OneHotEncoder()
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.tree.DecisionTreeClassifier())
-        model = sklearn.pipeline.Pipeline(steps=[
-            ('ohe', ohe), ('scaler', scaler), ('boosting', boosting)])
+            base_estimator=sklearn.tree.DecisionTreeClassifier()
+        )
+        model = sklearn.pipeline.Pipeline(
+            steps=[("ohe", ohe), ("scaler", scaler), ("boosting", boosting)]
+        )
         parameter_grid = {
-            'base_estimator__max_depth': scipy.stats.randint(1, 10),
-            'learning_rate': scipy.stats.uniform(0.01, 0.99),
-            'n_estimators': [1, 5, 10, 100]
+            "base_estimator__max_depth": scipy.stats.randint(1, 10),
+            "learning_rate": scipy.stats.uniform(0.01, 0.99),
+            "n_estimators": [1, 5, 10, 100],
         }
         # convert to ordered dict, sorted by keys) due to param grid check
         parameter_grid = OrderedDict(sorted(parameter_grid.items()))
         cv = sklearn.model_selection.StratifiedKFold(n_splits=5, shuffle=True)
         rs = sklearn.model_selection.RandomizedSearchCV(
-            estimator=model, param_distributions=parameter_grid, cv=cv)
+            estimator=model, param_distributions=parameter_grid, cv=cv
+        )
         serialized = self.extension.model_to_flow(rs)
-        structure = serialized.get_structure('name')
+        structure = serialized.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = ('_encoders'
-                               if LooseVersion(sklearn.__version__) >= "0.20"
-                               else 'data')
-        ohe_name = 'sklearn.preprocessing.%s.OneHotEncoder' % \
-                   module_name_encoder
-        scaler_name = 'sklearn.preprocessing.data.StandardScaler'
-        tree_name = 'sklearn.tree.tree.DecisionTreeClassifier'
-        boosting_name = 'sklearn.ensemble.weight_boosting.AdaBoostClassifier' \
-                        '(base_estimator=%s)' % tree_name
-        pipeline_name = 'sklearn.pipeline.Pipeline(ohe=%s,scaler=%s,' \
-                        'boosting=%s)' % (ohe_name, scaler_name, boosting_name)
-        fixture_name = 'sklearn.model_selection._search.RandomizedSearchCV' \
-                       '(estimator=%s)' % pipeline_name
+        module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder
+        scaler_name = "sklearn.preprocessing.data.StandardScaler"
+        tree_name = "sklearn.tree.tree.DecisionTreeClassifier"
+        boosting_name = (
+            "sklearn.ensemble.weight_boosting.AdaBoostClassifier" "(base_estimator=%s)" % tree_name
+        )
+        pipeline_name = "sklearn.pipeline.Pipeline(ohe=%s,scaler=%s," "boosting=%s)" % (
+            ohe_name,
+            scaler_name,
+            boosting_name,
+        )
+        fixture_name = (
+            "sklearn.model_selection._search.RandomizedSearchCV" "(estimator=%s)" % pipeline_name
+        )
         fixture_structure = {
-            ohe_name: ['estimator', 'ohe'],
-            scaler_name: ['estimator', 'scaler'],
-            tree_name: ['estimator', 'boosting', 'base_estimator'],
-            boosting_name: ['estimator', 'boosting'],
-            pipeline_name: ['estimator'],
-            fixture_name: []
+            ohe_name: ["estimator", "ohe"],
+            scaler_name: ["estimator", "scaler"],
+            tree_name: ["estimator", "boosting", "base_estimator"],
+            boosting_name: ["estimator", "boosting"],
+            pipeline_name: ["estimator"],
+            fixture_name: [],
         }
         self.assertEqual(serialized.name, fixture_name)
         self.assertEqual(structure, fixture_structure)
@@ -776,8 +804,7 @@ def test_serialize_complex_flow(self):
         assert_flows_equal(serialized, serialized2)
 
     def test_serialize_type(self):
-        supported_types = [float, np.float, np.float32, np.float64,
-                           int, np.int, np.int32, np.int64]
+        supported_types = [float, np.float, np.float32, np.float64, int, np.int, np.int32, np.int64]
 
         for supported_type in supported_types:
             serialized = self.extension.model_to_flow(supported_type)
@@ -785,9 +812,11 @@ def test_serialize_type(self):
             self.assertEqual(deserialized, supported_type)
 
     def test_serialize_rvs(self):
-        supported_rvs = [scipy.stats.norm(loc=1, scale=5),
-                         scipy.stats.expon(loc=1, scale=5),
-                         scipy.stats.randint(low=-3, high=15)]
+        supported_rvs = [
+            scipy.stats.norm(loc=1, scale=5),
+            scipy.stats.expon(loc=1, scale=5),
+            scipy.stats.randint(low=-3, high=15),
+        ]
 
         for supported_rv in supported_rvs:
             serialized = self.extension.model_to_flow(supported_rv)
@@ -795,8 +824,7 @@ def test_serialize_rvs(self):
             self.assertEqual(type(deserialized.dist), type(supported_rv.dist))
             del deserialized.dist
             del supported_rv.dist
-            self.assertEqual(deserialized.__dict__,
-                             supported_rv.__dict__)
+            self.assertEqual(deserialized.__dict__, supported_rv.__dict__)
 
     def test_serialize_function(self):
         serialized = self.extension.model_to_flow(sklearn.feature_selection.chi2)
@@ -804,27 +832,45 @@ def test_serialize_function(self):
         self.assertEqual(deserialized, sklearn.feature_selection.chi2)
 
     def test_serialize_cvobject(self):
-        methods = [sklearn.model_selection.KFold(3),
-                   sklearn.model_selection.LeaveOneOut()]
+        methods = [sklearn.model_selection.KFold(3), sklearn.model_selection.LeaveOneOut()]
         fixtures = [
-            OrderedDict([
-                ('oml-python:serialized_object', 'cv_object'),
-                ('value', OrderedDict([
-                    ('name', 'sklearn.model_selection._split.KFold'),
-                    ('parameters', OrderedDict([
-                        ('n_splits', '3'),
-                        ('random_state', 'null'),
-                        ('shuffle', 'false'),
-                    ]))
-                ]))
-            ]),
-            OrderedDict([
-                ('oml-python:serialized_object', 'cv_object'),
-                ('value', OrderedDict([
-                    ('name', 'sklearn.model_selection._split.LeaveOneOut'),
-                    ('parameters', OrderedDict())
-                ]))
-            ]),
+            OrderedDict(
+                [
+                    ("oml-python:serialized_object", "cv_object"),
+                    (
+                        "value",
+                        OrderedDict(
+                            [
+                                ("name", "sklearn.model_selection._split.KFold"),
+                                (
+                                    "parameters",
+                                    OrderedDict(
+                                        [
+                                            ("n_splits", "3"),
+                                            ("random_state", "null"),
+                                            ("shuffle", "false"),
+                                        ]
+                                    ),
+                                ),
+                            ]
+                        ),
+                    ),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("oml-python:serialized_object", "cv_object"),
+                    (
+                        "value",
+                        OrderedDict(
+                            [
+                                ("name", "sklearn.model_selection._split.LeaveOneOut"),
+                                ("parameters", OrderedDict()),
+                            ]
+                        ),
+                    ),
+                ]
+            ),
         ]
         for method, fixture in zip(methods, fixtures):
             m = self.extension.model_to_flow(method)
@@ -841,17 +887,24 @@ def test_serialize_simple_parameter_grid(self):
 
         # Examples from the scikit-learn documentation
         models = [sklearn.svm.SVC(), sklearn.ensemble.RandomForestClassifier()]
-        grids = \
-            [[OrderedDict([('C', [1, 10, 100, 1000]), ('kernel', ['linear'])]),
-              OrderedDict([('C', [1, 10, 100, 1000]), ('gamma', [0.001, 0.0001]),
-                           ('kernel', ['rbf'])])],
-             OrderedDict([("bootstrap", [True, False]),
-                          ("criterion", ["gini", "entropy"]),
-                          ("max_depth", [3, None]),
-                          ("max_features", [1, 3, 10]),
-                          ("min_samples_leaf", [1, 3, 10]),
-                          ("min_samples_split", [1, 3, 10])
-                          ])]
+        grids = [
+            [
+                OrderedDict([("C", [1, 10, 100, 1000]), ("kernel", ["linear"])]),
+                OrderedDict(
+                    [("C", [1, 10, 100, 1000]), ("gamma", [0.001, 0.0001]), ("kernel", ["rbf"])]
+                ),
+            ],
+            OrderedDict(
+                [
+                    ("bootstrap", [True, False]),
+                    ("criterion", ["gini", "entropy"]),
+                    ("max_depth", [3, None]),
+                    ("max_features", [1, 3, 10]),
+                    ("min_samples_leaf", [1, 3, 10]),
+                    ("min_samples_split", [1, 3, 10]),
+                ]
+            ),
+        ]
 
         for grid, model in zip(grids, models):
             serialized = self.extension.model_to_flow(grid)
@@ -861,22 +914,24 @@ def test_serialize_simple_parameter_grid(self):
             self.assertIsNot(deserialized, grid)
             # providing error_score because nan != nan
             hpo = sklearn.model_selection.GridSearchCV(
-                param_grid=grid, estimator=model, error_score=-1000)
+                param_grid=grid, estimator=model, error_score=-1000
+            )
 
             serialized = self.extension.model_to_flow(hpo)
             deserialized = self.extension.flow_to_model(serialized)
             self.assertEqual(hpo.param_grid, deserialized.param_grid)
-            self.assertEqual(hpo.estimator.get_params(),
-                             deserialized.estimator.get_params())
+            self.assertEqual(hpo.estimator.get_params(), deserialized.estimator.get_params())
             hpo_params = hpo.get_params(deep=False)
             deserialized_params = deserialized.get_params(deep=False)
-            del hpo_params['estimator']
-            del deserialized_params['estimator']
+            del hpo_params["estimator"]
+            del deserialized_params["estimator"]
             self.assertEqual(hpo_params, deserialized_params)
 
-    @unittest.skip('This feature needs further reworking. If we allow several '
-                   'components, we need to register them all in the downstream '
-                   'flows. This is so far not implemented.')
+    @unittest.skip(
+        "This feature needs further reworking. If we allow several "
+        "components, we need to register them all in the downstream "
+        "flows. This is so far not implemented."
+    )
     def test_serialize_advanced_grid(self):
         # TODO instead a GridSearchCV object should be serialized
 
@@ -886,38 +941,45 @@ def test_serialize_advanced_grid(self):
         # This will only work with sklearn==0.18
         N_FEATURES_OPTIONS = [2, 4, 8]
         C_OPTIONS = [1, 10, 100, 1000]
-        grid = [{'reduce_dim': [sklearn.decomposition.PCA(iterated_power=7),
-                                sklearn.decomposition.NMF()],
-                 'reduce_dim__n_components': N_FEATURES_OPTIONS,
-                 'classify__C': C_OPTIONS},
-                {'reduce_dim': [sklearn.feature_selection.SelectKBest(
-                                sklearn.feature_selection.chi2)],
-                 'reduce_dim__k': N_FEATURES_OPTIONS,
-                 'classify__C': C_OPTIONS}]
+        grid = [
+            {
+                "reduce_dim": [
+                    sklearn.decomposition.PCA(iterated_power=7),
+                    sklearn.decomposition.NMF(),
+                ],
+                "reduce_dim__n_components": N_FEATURES_OPTIONS,
+                "classify__C": C_OPTIONS,
+            },
+            {
+                "reduce_dim": [
+                    sklearn.feature_selection.SelectKBest(sklearn.feature_selection.chi2)
+                ],
+                "reduce_dim__k": N_FEATURES_OPTIONS,
+                "classify__C": C_OPTIONS,
+            },
+        ]
 
         serialized = self.extension.model_to_flow(grid)
         deserialized = self.extension.flow_to_model(serialized)
 
-        self.assertEqual(grid[0]['reduce_dim'][0].get_params(),
-                         deserialized[0]['reduce_dim'][0].get_params())
-        self.assertIsNot(grid[0]['reduce_dim'][0],
-                         deserialized[0]['reduce_dim'][0])
-        self.assertEqual(grid[0]['reduce_dim'][1].get_params(),
-                         deserialized[0]['reduce_dim'][1].get_params())
-        self.assertIsNot(grid[0]['reduce_dim'][1],
-                         deserialized[0]['reduce_dim'][1])
-        self.assertEqual(grid[0]['reduce_dim__n_components'],
-                         deserialized[0]['reduce_dim__n_components'])
-        self.assertEqual(grid[0]['classify__C'],
-                         deserialized[0]['classify__C'])
-        self.assertEqual(grid[1]['reduce_dim'][0].get_params(),
-                         deserialized[1]['reduce_dim'][0].get_params())
-        self.assertIsNot(grid[1]['reduce_dim'][0],
-                         deserialized[1]['reduce_dim'][0])
-        self.assertEqual(grid[1]['reduce_dim__k'],
-                         deserialized[1]['reduce_dim__k'])
-        self.assertEqual(grid[1]['classify__C'],
-                         deserialized[1]['classify__C'])
+        self.assertEqual(
+            grid[0]["reduce_dim"][0].get_params(), deserialized[0]["reduce_dim"][0].get_params()
+        )
+        self.assertIsNot(grid[0]["reduce_dim"][0], deserialized[0]["reduce_dim"][0])
+        self.assertEqual(
+            grid[0]["reduce_dim"][1].get_params(), deserialized[0]["reduce_dim"][1].get_params()
+        )
+        self.assertIsNot(grid[0]["reduce_dim"][1], deserialized[0]["reduce_dim"][1])
+        self.assertEqual(
+            grid[0]["reduce_dim__n_components"], deserialized[0]["reduce_dim__n_components"]
+        )
+        self.assertEqual(grid[0]["classify__C"], deserialized[0]["classify__C"])
+        self.assertEqual(
+            grid[1]["reduce_dim"][0].get_params(), deserialized[1]["reduce_dim"][0].get_params()
+        )
+        self.assertIsNot(grid[1]["reduce_dim"][0], deserialized[1]["reduce_dim"][0])
+        self.assertEqual(grid[1]["reduce_dim__k"], deserialized[1]["reduce_dim__k"])
+        self.assertEqual(grid[1]["classify__C"], deserialized[1]["classify__C"])
 
     def test_serialize_advanced_grid_fails(self):
         # This unit test is checking that the test we skip above would actually fail
@@ -925,23 +987,20 @@ def test_serialize_advanced_grid_fails(self):
         param_grid = {
             "base_estimator": [
                 sklearn.tree.DecisionTreeClassifier(),
-                sklearn.tree.ExtraTreeClassifier()]
+                sklearn.tree.ExtraTreeClassifier(),
+            ]
         }
 
         clf = sklearn.model_selection.GridSearchCV(
-            sklearn.ensemble.BaggingClassifier(),
-            param_grid=param_grid,
+            sklearn.ensemble.BaggingClassifier(), param_grid=param_grid,
         )
         with self.assertRaisesRegex(
-            TypeError,
-                re.compile(r".*OpenML.*Flow.*is not JSON serializable",
-                           flags=re.DOTALL)
+            TypeError, re.compile(r".*OpenML.*Flow.*is not JSON serializable", flags=re.DOTALL)
         ):
             self.extension.model_to_flow(clf)
 
     def test_serialize_resampling(self):
-        kfold = sklearn.model_selection.StratifiedKFold(
-            n_splits=4, shuffle=True)
+        kfold = sklearn.model_selection.StratifiedKFold(n_splits=4, shuffle=True)
         serialized = self.extension.model_to_flow(kfold)
         deserialized = self.extension.flow_to_model(serialized)
         # Best approximation to get_params()
@@ -953,10 +1012,10 @@ def test_hypothetical_parameter_values(self):
         # string (and their correct serialization and deserialization) an only
         #  be checked inside a model
 
-        model = Model('true', '1', '0.1')
+        model = Model("true", "1", "0.1")
 
         serialized = self.extension.model_to_flow(model)
-        serialized.external_version = 'sklearn==test123'
+        serialized.external_version = "sklearn==test123"
         deserialized = self.extension.flow_to_model(serialized)
         self.assertEqual(deserialized.get_params(), model.get_params())
         self.assertIsNot(deserialized, model)
@@ -964,8 +1023,7 @@ def test_hypothetical_parameter_values(self):
     def test_gaussian_process(self):
         opt = scipy.optimize.fmin_l_bfgs_b
         kernel = sklearn.gaussian_process.kernels.Matern()
-        gp = sklearn.gaussian_process.GaussianProcessClassifier(
-            kernel=kernel, optimizer=opt)
+        gp = sklearn.gaussian_process.GaussianProcessClassifier(kernel=kernel, optimizer=opt)
         with self.assertRaisesRegex(
             TypeError,
             r"Matern\(length_scale=1, nu=1.5\), <class 'sklearn.gaussian_process.kernels.Matern'>",
@@ -977,47 +1035,52 @@ def test_error_on_adding_component_multiple_times_to_flow(self):
         # - openml.flows._check_multiple_occurence_of_component_in_flow()
         pca = sklearn.decomposition.PCA()
         pca2 = sklearn.decomposition.PCA()
-        pipeline = sklearn.pipeline.Pipeline((('pca1', pca), ('pca2', pca2)))
+        pipeline = sklearn.pipeline.Pipeline((("pca1", pca), ("pca2", pca2)))
         fixture = "Found a second occurence of component .*.PCA when trying to serialize Pipeline"
         with self.assertRaisesRegex(ValueError, fixture):
             self.extension.model_to_flow(pipeline)
 
-        fu = sklearn.pipeline.FeatureUnion((('pca1', pca), ('pca2', pca2)))
-        fixture = "Found a second occurence of component .*.PCA when trying " \
-                  "to serialize FeatureUnion"
+        fu = sklearn.pipeline.FeatureUnion((("pca1", pca), ("pca2", pca2)))
+        fixture = (
+            "Found a second occurence of component .*.PCA when trying " "to serialize FeatureUnion"
+        )
         with self.assertRaisesRegex(ValueError, fixture):
             self.extension.model_to_flow(fu)
 
         fs = sklearn.feature_selection.SelectKBest()
-        fu2 = sklearn.pipeline.FeatureUnion((('pca1', pca), ('fs', fs)))
-        pipeline2 = sklearn.pipeline.Pipeline((('fu', fu2), ('pca2', pca2)))
+        fu2 = sklearn.pipeline.FeatureUnion((("pca1", pca), ("fs", fs)))
+        pipeline2 = sklearn.pipeline.Pipeline((("fu", fu2), ("pca2", pca2)))
         fixture = "Found a second occurence of component .*.PCA when trying to serialize Pipeline"
         with self.assertRaisesRegex(ValueError, fixture):
             self.extension.model_to_flow(pipeline2)
 
     def test_subflow_version_propagated(self):
         this_directory = os.path.dirname(os.path.abspath(__file__))
-        tests_directory = os.path.abspath(os.path.join(this_directory,
-                                                       '..', '..'))
+        tests_directory = os.path.abspath(os.path.join(this_directory, "..", ".."))
         sys.path.append(tests_directory)
         import tests.test_flows.dummy_learn.dummy_forest
+
         pca = sklearn.decomposition.PCA()
         dummy = tests.test_flows.dummy_learn.dummy_forest.DummyRegressor()
-        pipeline = sklearn.pipeline.Pipeline((('pca', pca), ('dummy', dummy)))
+        pipeline = sklearn.pipeline.Pipeline((("pca", pca), ("dummy", dummy)))
         flow = self.extension.model_to_flow(pipeline)
         # In python2.7, the unit tests work differently on travis-ci; therefore,
         # I put the alternative travis-ci answer here as well. While it has a
         # different value, it is still correct as it is a propagation of the
         # subclasses' module name
-        self.assertEqual(flow.external_version, '%s,%s,%s' % (
-            self.extension._format_external_version('openml', openml.__version__),
-            self.extension._format_external_version('sklearn', sklearn.__version__),
-            self.extension._format_external_version('tests', '0.1')))
+        self.assertEqual(
+            flow.external_version,
+            "%s,%s,%s"
+            % (
+                self.extension._format_external_version("openml", openml.__version__),
+                self.extension._format_external_version("sklearn", sklearn.__version__),
+                self.extension._format_external_version("tests", "0.1"),
+            ),
+        )
 
-    @mock.patch('warnings.warn')
+    @mock.patch("warnings.warn")
     def test_check_dependencies(self, warnings_mock):
-        dependencies = ['sklearn==0.1', 'sklearn>=99.99.99',
-                        'sklearn>99.99.99']
+        dependencies = ["sklearn==0.1", "sklearn>=99.99.99", "sklearn>99.99.99"]
         for dependency in dependencies:
             self.assertRaises(ValueError, self.extension._check_dependencies, dependency)
 
@@ -1025,12 +1088,16 @@ def test_illegal_parameter_names(self):
         # illegal name: estimators
         clf1 = sklearn.ensemble.VotingClassifier(
             estimators=[
-                ('estimators', sklearn.ensemble.RandomForestClassifier()),
-                ('whatevs', sklearn.ensemble.ExtraTreesClassifier())])
+                ("estimators", sklearn.ensemble.RandomForestClassifier()),
+                ("whatevs", sklearn.ensemble.ExtraTreesClassifier()),
+            ]
+        )
         clf2 = sklearn.ensemble.VotingClassifier(
             estimators=[
-                ('whatevs', sklearn.ensemble.RandomForestClassifier()),
-                ('estimators', sklearn.ensemble.ExtraTreesClassifier())])
+                ("whatevs", sklearn.ensemble.RandomForestClassifier()),
+                ("estimators", sklearn.ensemble.ExtraTreesClassifier()),
+            ]
+        )
         cases = [clf1, clf2]
 
         for case in cases:
@@ -1039,26 +1106,32 @@ def test_illegal_parameter_names(self):
     def test_illegal_parameter_names_pipeline(self):
         # illegal name: steps
         steps = [
-            ('Imputer', SimpleImputer(strategy='median')),
-            ('OneHotEncoder',
-             sklearn.preprocessing.OneHotEncoder(sparse=False,
-                                                 handle_unknown='ignore')),
-            ('steps', sklearn.ensemble.BaggingClassifier(
-                base_estimator=sklearn.tree.DecisionTreeClassifier))
+            ("Imputer", SimpleImputer(strategy="median")),
+            (
+                "OneHotEncoder",
+                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+            ),
+            (
+                "steps",
+                sklearn.ensemble.BaggingClassifier(
+                    base_estimator=sklearn.tree.DecisionTreeClassifier
+                ),
+            ),
         ]
         self.assertRaises(ValueError, sklearn.pipeline.Pipeline, steps=steps)
 
     def test_illegal_parameter_names_featureunion(self):
         # illegal name: transformer_list
         transformer_list = [
-            ('transformer_list',
-             SimpleImputer(strategy='median')),
-            ('OneHotEncoder',
-             sklearn.preprocessing.OneHotEncoder(sparse=False,
-                                                 handle_unknown='ignore'))
+            ("transformer_list", SimpleImputer(strategy="median")),
+            (
+                "OneHotEncoder",
+                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+            ),
         ]
-        self.assertRaises(ValueError, sklearn.pipeline.FeatureUnion,
-                          transformer_list=transformer_list)
+        self.assertRaises(
+            ValueError, sklearn.pipeline.FeatureUnion, transformer_list=transformer_list
+        )
 
     def test_paralizable_check(self):
         # using this model should pass the test (if param distribution is
@@ -1076,33 +1149,31 @@ def test_paralizable_check(self):
             sklearn.ensemble.RandomForestClassifier(n_jobs=5),
             sklearn.ensemble.RandomForestClassifier(n_jobs=-1),
             sklearn.pipeline.Pipeline(
-                steps=[('bag', sklearn.ensemble.BaggingClassifier(n_jobs=1))]),
+                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=1))]
+            ),
             sklearn.pipeline.Pipeline(
-                steps=[('bag', sklearn.ensemble.BaggingClassifier(n_jobs=5))]),
+                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=5))]
+            ),
             sklearn.pipeline.Pipeline(
-                steps=[('bag', sklearn.ensemble.BaggingClassifier(n_jobs=-1))]),
-            sklearn.model_selection.GridSearchCV(singlecore_bagging,
-                                                 legal_param_dist),
-            sklearn.model_selection.GridSearchCV(multicore_bagging,
-                                                 legal_param_dist),
+                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=-1))]
+            ),
+            sklearn.model_selection.GridSearchCV(singlecore_bagging, legal_param_dist),
+            sklearn.model_selection.GridSearchCV(multicore_bagging, legal_param_dist),
             sklearn.ensemble.BaggingClassifier(
-                n_jobs=-1,
-                base_estimator=sklearn.ensemble.RandomForestClassifier(n_jobs=5)
-            )
+                n_jobs=-1, base_estimator=sklearn.ensemble.RandomForestClassifier(n_jobs=5)
+            ),
         ]
         illegal_models = [
-            sklearn.model_selection.GridSearchCV(singlecore_bagging,
-                                                 illegal_param_dist),
-            sklearn.model_selection.GridSearchCV(multicore_bagging,
-                                                 illegal_param_dist)
+            sklearn.model_selection.GridSearchCV(singlecore_bagging, illegal_param_dist),
+            sklearn.model_selection.GridSearchCV(multicore_bagging, illegal_param_dist),
         ]
 
         can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False]
         can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False]
 
-        for model, allowed_cputime, allowed_walltime in zip(legal_models,
-                                                            can_measure_cputime_answers,
-                                                            can_measure_walltime_answers):
+        for model, allowed_cputime, allowed_walltime in zip(
+            legal_models, can_measure_cputime_answers, can_measure_walltime_answers
+        ):
             self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime)
             self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime)
 
@@ -1116,49 +1187,49 @@ def test__get_fn_arguments_with_defaults(self):
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 15),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 12),
-                (sklearn.pipeline.Pipeline.__init__, 0)
+                (sklearn.pipeline.Pipeline.__init__, 0),
             ]
         elif sklearn_version < "0.21":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 1)
+                (sklearn.pipeline.Pipeline.__init__, 1),
             ]
         else:
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 2)
+                (sklearn.pipeline.Pipeline.__init__, 2),
             ]
 
         for fn, num_params_with_defaults in fns:
-            defaults, defaultless = (
-                self.extension._get_fn_arguments_with_defaults(fn)
-            )
+            defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
             self.assertIsInstance(defaults, dict)
             self.assertIsInstance(defaultless, set)
             # check whether we have both defaults and defaultless params
             self.assertEqual(len(defaults), num_params_with_defaults)
             self.assertGreater(len(defaultless), 0)
             # check no overlap
-            self.assertSetEqual(set(defaults.keys()),
-                                set(defaults.keys()) - defaultless)
-            self.assertSetEqual(defaultless,
-                                defaultless - set(defaults.keys()))
+            self.assertSetEqual(set(defaults.keys()), set(defaults.keys()) - defaultless)
+            self.assertSetEqual(defaultless, defaultless - set(defaults.keys()))
 
     def test_deserialize_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', SimpleImputer()),
-                 ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
-                 ('Estimator', sklearn.tree.DecisionTreeClassifier())]
+        steps = [
+            ("Imputer", SimpleImputer()),
+            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
+            ("Estimator", sklearn.tree.DecisionTreeClassifier()),
+        ]
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {'Imputer__strategy': 'median',
-                  'OneHotEncoder__sparse': False,
-                  'Estimator__min_samples_leaf': 42}
+        params = {
+            "Imputer__strategy": "median",
+            "OneHotEncoder__sparse": False,
+            "Estimator__min_samples_leaf": 42,
+        }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1174,16 +1245,22 @@ def test_deserialize_adaboost_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
         # settings.
-        steps = [('Imputer', SimpleImputer()),
-                 ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
-                 ('Estimator', sklearn.ensemble.AdaBoostClassifier(
-                     sklearn.tree.DecisionTreeClassifier()))]
+        steps = [
+            ("Imputer", SimpleImputer()),
+            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
+            (
+                "Estimator",
+                sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier()),
+            ),
+        ]
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {'Imputer__strategy': 'median',
-                  'OneHotEncoder__sparse': False,
-                  'Estimator__n_estimators': 10}
+        params = {
+            "Imputer__strategy": "median",
+            "OneHotEncoder__sparse": False,
+            "Estimator__n_estimators": 10,
+        }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1200,28 +1277,30 @@ def test_deserialize_complex_with_defaults(self):
         # method to return a flow that contains default hyperparameter
         # settings.
         steps = [
-            ('Imputer', SimpleImputer()),
-            ('OneHotEncoder', sklearn.preprocessing.OneHotEncoder()),
+            ("Imputer", SimpleImputer()),
+            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
             (
-                'Estimator',
+                "Estimator",
                 sklearn.ensemble.AdaBoostClassifier(
                     sklearn.ensemble.BaggingClassifier(
                         sklearn.ensemble.GradientBoostingClassifier(
                             sklearn.neighbors.KNeighborsClassifier()
                         )
                     )
-                )
+                ),
             ),
         ]
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {'Imputer__strategy': 'median',
-                  'OneHotEncoder__sparse': False,
-                  'Estimator__n_estimators': 10,
-                  'Estimator__base_estimator__n_estimators': 10,
-                  'Estimator__base_estimator__base_estimator__learning_rate': 0.1,
-                  'Estimator__base_estimator__base_estimator__loss__n_neighbors': 13}
+        params = {
+            "Imputer__strategy": "median",
+            "OneHotEncoder__sparse": False,
+            "Estimator__n_estimators": 10,
+            "Estimator__base_estimator__n_estimators": 10,
+            "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
+            "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
+        }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1236,15 +1315,15 @@ def test_deserialize_complex_with_defaults(self):
     def test_openml_param_name_to_sklearn(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.tree.DecisionTreeClassifier())
-        model = sklearn.pipeline.Pipeline(steps=[
-            ('scaler', scaler), ('boosting', boosting)])
+            base_estimator=sklearn.tree.DecisionTreeClassifier()
+        )
+        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("boosting", boosting)])
         flow = self.extension.model_to_flow(model)
         task = openml.tasks.get_task(115)
         run = openml.runs.run_flow_on_task(flow, task)
         run = run.publish()
-        TestBase._mark_entity_for_removal('run', run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
+        TestBase._mark_entity_for_removal("run", run.run_id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
         run = openml.runs.get_run(run.run_id)
         setup = openml.setups.get_setup(run.setup_id)
 
@@ -1264,24 +1343,19 @@ def test_openml_param_name_to_sklearn(self):
                 subflow = flow.get_subflow(splitted[0:-1])
             else:
                 subflow = flow
-            openml_name = "%s(%s)_%s" % (subflow.name,
-                                         subflow.version,
-                                         splitted[-1])
+            openml_name = "%s(%s)_%s" % (subflow.name, subflow.version, splitted[-1])
             self.assertEqual(parameter.full_name, openml_name)
 
     def test_obtain_parameter_values_flow_not_from_server(self):
-        model = sklearn.linear_model.LogisticRegression(solver='lbfgs')
+        model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
         flow = self.extension.model_to_flow(model)
-        msg = 'Flow sklearn.linear_model.logistic.LogisticRegression has no ' \
-              'flow_id!'
+        msg = "Flow sklearn.linear_model.logistic.LogisticRegression has no " "flow_id!"
 
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.obtain_parameter_values(flow)
 
         model = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.linear_model.LogisticRegression(
-                solver='lbfgs',
-            )
+            base_estimator=sklearn.linear_model.LogisticRegression(solver="lbfgs",)
         )
         flow = self.extension.model_to_flow(model)
         flow.flow_id = 1
@@ -1297,25 +1371,26 @@ def test_obtain_parameter_values(self):
                 "max_features": [1, 2, 3, 4],
                 "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
                 "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                "bootstrap": [True, False], "criterion": ["gini", "entropy"]},
-            cv=sklearn.model_selection.StratifiedKFold(n_splits=2,
-                                                       random_state=1),
-            n_iter=5)
+                "bootstrap": [True, False],
+                "criterion": ["gini", "entropy"],
+            },
+            cv=sklearn.model_selection.StratifiedKFold(n_splits=2, random_state=1),
+            n_iter=5,
+        )
         flow = self.extension.model_to_flow(model)
         flow.flow_id = 1
-        flow.components['estimator'].flow_id = 2
+        flow.components["estimator"].flow_id = 2
         parameters = self.extension.obtain_parameter_values(flow)
         for parameter in parameters:
-            self.assertIsNotNone(parameter['oml:component'], msg=parameter)
-            if parameter['oml:name'] == 'n_estimators':
-                self.assertEqual(parameter['oml:value'], '5')
-                self.assertEqual(parameter['oml:component'], 2)
+            self.assertIsNotNone(parameter["oml:component"], msg=parameter)
+            if parameter["oml:name"] == "n_estimators":
+                self.assertEqual(parameter["oml:value"], "5")
+                self.assertEqual(parameter["oml:component"], 2)
 
     def test_numpy_type_allowed_in_flow(self):
         """ Simple numpy types should be serializable. """
         dt = sklearn.tree.DecisionTreeClassifier(
-            max_depth=np.float64(3.0),
-            min_samples_leaf=np.int32(5)
+            max_depth=np.float64(3.0), min_samples_leaf=np.int32(5)
         )
         self.extension.model_to_flow(dt)
 
@@ -1339,9 +1414,9 @@ def setUp(self):
     def test_run_model_on_task(self):
         class MyPipe(sklearn.pipeline.Pipeline):
             pass
+
         task = openml.tasks.get_task(1)
-        pipe = MyPipe([('imp', SimpleImputer()),
-                       ('dummy', sklearn.dummy.DummyClassifier())])
+        pipe = MyPipe([("imp", SimpleImputer()), ("dummy", sklearn.dummy.DummyClassifier())])
         openml.runs.run_model_on_task(pipe, task)
 
     def test_seed_model(self):
@@ -1359,14 +1434,13 @@ def test_seed_model(self):
                 },
                 cv=sklearn.model_selection.StratifiedKFold(n_splits=2, shuffle=True),
             ),
-            sklearn.dummy.DummyClassifier()
+            sklearn.dummy.DummyClassifier(),
         ]
 
         for idx, clf in enumerate(randomized_clfs):
             const_probe = 42
             all_params = clf.get_params()
-            params = [key for key in all_params if
-                      key.endswith('random_state')]
+            params = [key for key in all_params if key.endswith("random_state")]
             self.assertGreater(len(params), 0)
 
             # before param value is None
@@ -1377,8 +1451,7 @@ def test_seed_model(self):
             clf_seeded = self.extension.seed_model(clf, const_probe)
             new_params = clf_seeded.get_params()
 
-            randstate_params = [key for key in new_params if
-                                key.endswith('random_state')]
+            randstate_params = [key for key in new_params if key.endswith("random_state")]
 
             # afterwards, param value is set
             for param in randstate_params:
@@ -1393,7 +1466,7 @@ def test_seed_model_raises(self):
         # anything else than an int
         randomized_clfs = [
             sklearn.ensemble.BaggingClassifier(random_state=np.random.RandomState(42)),
-            sklearn.dummy.DummyClassifier(random_state="OpenMLIsGreat")
+            sklearn.dummy.DummyClassifier(random_state="OpenMLIsGreat"),
         ]
 
         for clf in randomized_clfs:
@@ -1404,17 +1477,15 @@ def test_run_model_on_fold_classification_1(self):
         task = openml.tasks.get_task(1)
 
         X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(
-            repeat=0, fold=0, sample=0)
+        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
         X_train = X[train_indices]
         y_train = y[train_indices]
         X_test = X[test_indices]
         y_test = y[test_indices]
 
-        pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', SimpleImputer()),
-            ('clf', sklearn.tree.DecisionTreeClassifier()),
-        ])
+        pipeline = sklearn.pipeline.Pipeline(
+            steps=[("imp", SimpleImputer()), ("clf", sklearn.tree.DecisionTreeClassifier())]
+        )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
             model=pipeline,
@@ -1460,18 +1531,14 @@ def test_run_model_on_fold_classification_2(self):
         task = openml.tasks.get_task(7)
 
         X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(
-            repeat=0, fold=0, sample=0)
+        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
         X_train = X[train_indices]
         y_train = y[train_indices]
         X_test = X[test_indices]
         y_test = y[test_indices]
 
         pipeline = sklearn.model_selection.GridSearchCV(
-            sklearn.tree.DecisionTreeClassifier(),
-            {
-                "max_depth": [1, 2],
-            },
+            sklearn.tree.DecisionTreeClassifier(), {"max_depth": [1, 2]},
         )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
@@ -1513,7 +1580,6 @@ def test_run_model_on_fold_classification_2(self):
         )
 
     def test_run_model_on_fold_classification_3(self):
-
         class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
             # class for testing a naive bayes classifier that does not allow soft
             # predictions
@@ -1521,30 +1587,31 @@ def __init__(self, priors=None):
                 super(HardNaiveBayes, self).__init__(priors)
 
             def predict_proba(*args, **kwargs):
-                raise AttributeError('predict_proba is not available when '
-                                     'probability=False')
+                raise AttributeError("predict_proba is not available when " "probability=False")
 
         # task 1 (test server) is important: it is a task with an unused class
         tasks = [1, 3, 115]
         flow = unittest.mock.Mock()
-        flow.name = 'dummy'
+        flow.name = "dummy"
 
         for task_id in tasks:
             task = openml.tasks.get_task(task_id)
             X, y = task.get_X_and_y()
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=0, fold=0, sample=0)
+                repeat=0, fold=0, sample=0
+            )
             X_train = X[train_indices]
             y_train = y[train_indices]
             X_test = X[test_indices]
-            clf1 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', SimpleImputer()),
-                ('estimator', sklearn.naive_bayes.GaussianNB())
-            ])
-            clf2 = sklearn.pipeline.Pipeline(steps=[
-                ('imputer', SimpleImputer()),
-                ('estimator', HardNaiveBayes())
-            ])
+            clf1 = sklearn.pipeline.Pipeline(
+                steps=[
+                    ("imputer", SimpleImputer()),
+                    ("estimator", sklearn.naive_bayes.GaussianNB()),
+                ]
+            )
+            clf2 = sklearn.pipeline.Pipeline(
+                steps=[("imputer", SimpleImputer()), ("estimator", HardNaiveBayes())]
+            )
 
             pred_1, proba_1, _, _ = self.extension._run_model_on_fold(
                 model=clf1,
@@ -1587,17 +1654,15 @@ def test_run_model_on_fold_regression(self):
         task = openml.tasks.get_task(2999)
 
         X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(
-            repeat=0, fold=0, sample=0)
+        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
         X_train = X[train_indices]
         y_train = y[train_indices]
         X_test = X[test_indices]
         y_test = y[test_indices]
 
-        pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', SimpleImputer()),
-            ('clf', sklearn.tree.DecisionTreeRegressor()),
-        ])
+        pipeline = sklearn.pipeline.Pipeline(
+            steps=[("imp", SimpleImputer()), ("clf", sklearn.tree.DecisionTreeRegressor())]
+        )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
             model=pipeline,
@@ -1637,26 +1702,21 @@ def test_run_model_on_fold_clustering(self):
         openml.config.server = self.production_server
         task = openml.tasks.get_task(126033)
 
-        X = task.get_X(dataset_format='array')
+        X = task.get_X(dataset_format="array")
 
-        pipeline = sklearn.pipeline.Pipeline(steps=[
-            ('imp', SimpleImputer()),
-            ('clf', sklearn.cluster.KMeans()),
-        ])
+        pipeline = sklearn.pipeline.Pipeline(
+            steps=[("imp", SimpleImputer()), ("clf", sklearn.cluster.KMeans())]
+        )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X,
+            model=pipeline, task=task, fold_no=0, rep_no=0, X_train=X,
         )
 
         y_hat, y_hat_proba, user_defined_measures, trace = res
 
         # predictions
         self.assertIsInstance(y_hat, np.ndarray)
-        self.assertEqual(y_hat.shape, (X.shape[0], ))
+        self.assertEqual(y_hat.shape, (X.shape[0],))
         self.assertIsNone(y_hat_proba)
 
         # check user defined measures
@@ -1677,26 +1737,26 @@ def test_run_model_on_fold_clustering(self):
 
     def test__extract_trace_data(self):
 
-        param_grid = {"hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
-                      "activation": ['identity', 'logistic', 'tanh', 'relu'],
-                      "learning_rate_init": [0.1, 0.01, 0.001, 0.0001],
-                      "max_iter": [10, 20, 40, 80]}
+        param_grid = {
+            "hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
+            "activation": ["identity", "logistic", "tanh", "relu"],
+            "learning_rate_init": [0.1, 0.01, 0.001, 0.0001],
+            "max_iter": [10, 20, 40, 80],
+        }
         num_iters = 10
         task = openml.tasks.get_task(20)
         clf = sklearn.model_selection.RandomizedSearchCV(
-            sklearn.neural_network.MLPClassifier(),
-            param_grid,
-            num_iters,
+            sklearn.neural_network.MLPClassifier(), param_grid, num_iters,
         )
         # just run the task on the model (without invoking any fancy extension & openml code)
         train, _ = task.get_train_test_split_indices(0, 0)
         X, y = task.get_X_and_y()
         with warnings.catch_warnings():
-            warnings.simplefilter('ignore')
+            warnings.simplefilter("ignore")
             clf.fit(X[train], y[train])
 
         # check num layers of MLP
-        self.assertIn(clf.best_estimator_.hidden_layer_sizes, param_grid['hidden_layer_sizes'])
+        self.assertIn(clf.best_estimator_.hidden_layer_sizes, param_grid["hidden_layer_sizes"])
 
         trace_list = self.extension._extract_trace_data(clf, rep_no=0, fold_no=0)
         trace = self.extension._obtain_arff_trace(clf, trace_list)
@@ -1726,6 +1786,7 @@ def test__extract_trace_data(self):
 
     def test_trim_flow_name(self):
         import re
+
         long = """sklearn.pipeline.Pipeline(
                     columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
                         numeric=sklearn.pipeline.Pipeline(
@@ -1738,10 +1799,11 @@ def test_trim_flow_name(self):
                     svc=sklearn.svm.classes.SVC)"""
         short = "sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)"
         shorter = "sklearn.Pipeline(...,SVC)"
-        long_stripped, _ = re.subn(r'\s', '', long)
+        long_stripped, _ = re.subn(r"\s", "", long)
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
-        self.assertEqual(shorter,
-                         SklearnExtension.trim_flow_name(long_stripped, extra_trim_length=50))
+        self.assertEqual(
+            shorter, SklearnExtension.trim_flow_name(long_stripped, extra_trim_length=50)
+        )
 
         long = """sklearn.pipeline.Pipeline(
                     imputation=openmlstudy14.preprocessing.ConditionalImputer,
@@ -1749,7 +1811,7 @@ def test_trim_flow_name(self):
                     variencethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
                     classifier=sklearn.ensemble.forest.RandomForestClassifier)"""
         short = "sklearn.Pipeline(ConditionalImputer,OneHotEncoder,VarianceThreshold,RandomForestClassifier)"  # noqa: E501
-        long_stripped, _ = re.subn(r'\s', '', long)
+        long_stripped, _ = re.subn(r"\s", "", long)
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = """sklearn.pipeline.Pipeline(
@@ -1758,7 +1820,7 @@ def test_trim_flow_name(self):
                     Estimator=sklearn.model_selection._search.RandomizedSearchCV(
                         estimator=sklearn.tree.tree.DecisionTreeClassifier))"""
         short = "sklearn.Pipeline(Imputer,VarianceThreshold,RandomizedSearchCV(DecisionTreeClassifier))"  # noqa: E501
-        long_stripped, _ = re.subn(r'\s', '', long)
+        long_stripped, _ = re.subn(r"\s", "", long)
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = """sklearn.model_selection._search.RandomizedSearchCV(
@@ -1766,68 +1828,87 @@ def test_trim_flow_name(self):
                         SimpleImputer=sklearn.preprocessing.imputation.Imputer,
                         classifier=sklearn.ensemble.forest.RandomForestClassifier))"""
         short = "sklearn.RandomizedSearchCV(Pipeline(Imputer,RandomForestClassifier))"
-        long_stripped, _ = re.subn(r'\s', '', long)
+        long_stripped, _ = re.subn(r"\s", "", long)
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = """sklearn.pipeline.FeatureUnion(
                     pca=sklearn.decomposition.pca.PCA,
                     svd=sklearn.decomposition.truncated_svd.TruncatedSVD)"""
         short = "sklearn.FeatureUnion(PCA,TruncatedSVD)"
-        long_stripped, _ = re.subn(r'\s', '', long)
+        long_stripped, _ = re.subn(r"\s", "", long)
         self.assertEqual(short, SklearnExtension.trim_flow_name(long_stripped))
 
         long = "sklearn.ensemble.forest.RandomForestClassifier"
         short = "sklearn.RandomForestClassifier"
         self.assertEqual(short, SklearnExtension.trim_flow_name(long))
 
-        self.assertEqual("weka.IsolationForest",
-                         SklearnExtension.trim_flow_name("weka.IsolationForest"))
+        self.assertEqual(
+            "weka.IsolationForest", SklearnExtension.trim_flow_name("weka.IsolationForest")
+        )
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.21",
-                     reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
-                            "Pipeline till 0.20 doesn't support indexing and 'passthrough'")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
+        "Pipeline till 0.20 doesn't support indexing and 'passthrough'",
+    )
     def test_run_on_model_with_empty_steps(self):
         from sklearn.compose import ColumnTransformer
+
         # testing 'drop', 'passthrough', None as non-actionable sklearn estimators
         dataset = openml.datasets.get_dataset(128)
         task = openml.tasks.get_task(59)
 
         X, y, categorical_ind, feature_names = dataset.get_data(
-            target=dataset.default_target_attribute, dataset_format='array')
+            target=dataset.default_target_attribute, dataset_format="array"
+        )
         categorical_ind = np.array(categorical_ind)
-        cat_idx, = np.where(categorical_ind)
-        cont_idx, = np.where(~categorical_ind)
+        (cat_idx,) = np.where(categorical_ind)
+        (cont_idx,) = np.where(~categorical_ind)
 
         clf = make_pipeline(
-            ColumnTransformer([('cat', make_pipeline(SimpleImputer(strategy='most_frequent'),
-                                                     OneHotEncoder()), cat_idx.tolist()),
-                               ('cont', make_pipeline(SimpleImputer(strategy='median'),
-                                                      StandardScaler()), cont_idx.tolist())])
+            ColumnTransformer(
+                [
+                    (
+                        "cat",
+                        make_pipeline(SimpleImputer(strategy="most_frequent"), OneHotEncoder()),
+                        cat_idx.tolist(),
+                    ),
+                    (
+                        "cont",
+                        make_pipeline(SimpleImputer(strategy="median"), StandardScaler()),
+                        cont_idx.tolist(),
+                    ),
+                ]
+            )
         )
 
-        clf = sklearn.pipeline.Pipeline([
-            ('dummystep', 'passthrough'),  # adding 'passthrough' as an estimator
-            ('prep', clf),
-            ('classifier', sklearn.svm.SVC(gamma='auto'))
-        ])
+        clf = sklearn.pipeline.Pipeline(
+            [
+                ("dummystep", "passthrough"),  # adding 'passthrough' as an estimator
+                ("prep", clf),
+                ("classifier", sklearn.svm.SVC(gamma="auto")),
+            ]
+        )
 
         # adding 'drop' to a ColumnTransformer
         if not categorical_ind.any():
-            clf[1][0].set_params(cat='drop')
+            clf[1][0].set_params(cat="drop")
         if not (~categorical_ind).any():
-            clf[1][0].set_params(cont='drop')
+            clf[1][0].set_params(cont="drop")
 
         # serializing model with non-actionable step
         run, flow = openml.runs.run_model_on_task(model=clf, task=task, return_flow=True)
 
         self.assertEqual(len(flow.components), 3)
-        self.assertEqual(flow.components['dummystep'], 'passthrough')
-        self.assertTrue(isinstance(flow.components['classifier'], OpenMLFlow))
-        self.assertTrue(isinstance(flow.components['prep'], OpenMLFlow))
-        self.assertTrue(isinstance(flow.components['prep'].components['columntransformer'],
-                        OpenMLFlow))
-        self.assertEqual(flow.components['prep'].components['columntransformer'].components['cat'],
-                         'drop')
+        self.assertEqual(flow.components["dummystep"], "passthrough")
+        self.assertTrue(isinstance(flow.components["classifier"], OpenMLFlow))
+        self.assertTrue(isinstance(flow.components["prep"], OpenMLFlow))
+        self.assertTrue(
+            isinstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
+        )
+        self.assertEqual(
+            flow.components["prep"].components["columntransformer"].components["cat"], "drop"
+        )
 
         # de-serializing flow to a model with non-actionable step
         model = self.extension.flow_to_model(flow)
@@ -1835,13 +1916,16 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(type(model), type(clf))
         self.assertNotEqual(model, clf)
         self.assertEqual(len(model.named_steps), 3)
-        self.assertEqual(model.named_steps['dummystep'], 'passthrough')
+        self.assertEqual(model.named_steps["dummystep"], "passthrough")
 
     def test_sklearn_serialization_with_none_step(self):
-        msg = 'Cannot serialize objects of None type. Please use a valid ' \
-              'placeholder for None. Note that empty sklearn estimators can be ' \
-              'replaced with \'drop\' or \'passthrough\'.'
-        clf = sklearn.pipeline.Pipeline([('dummystep', None),
-                                         ('classifier', sklearn.svm.SVC(gamma='auto'))])
+        msg = (
+            "Cannot serialize objects of None type. Please use a valid "
+            "placeholder for None. Note that empty sklearn estimators can be "
+            "replaced with 'drop' or 'passthrough'."
+        )
+        clf = sklearn.pipeline.Pipeline(
+            [("dummystep", None), ("classifier", sklearn.svm.SVC(gamma="auto"))]
+        )
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.model_to_flow(clf)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 7e735d655..9f289870e 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -56,21 +56,21 @@ def test_get_flow(self):
         self.assertIsInstance(subflow_1, openml.OpenMLFlow)
         self.assertEqual(subflow_1.flow_id, 4025)
         self.assertEqual(len(subflow_1.parameters), 14)
-        self.assertEqual(subflow_1.parameters['E'], 'CC')
+        self.assertEqual(subflow_1.parameters["E"], "CC")
         self.assertEqual(len(subflow_1.components), 1)
 
         subflow_2 = list(subflow_1.components.values())[0]
         self.assertIsInstance(subflow_2, openml.OpenMLFlow)
         self.assertEqual(subflow_2.flow_id, 4026)
         self.assertEqual(len(subflow_2.parameters), 13)
-        self.assertEqual(subflow_2.parameters['I'], '10')
+        self.assertEqual(subflow_2.parameters["I"], "10")
         self.assertEqual(len(subflow_2.components), 1)
 
         subflow_3 = list(subflow_2.components.values())[0]
         self.assertIsInstance(subflow_3, openml.OpenMLFlow)
         self.assertEqual(subflow_3.flow_id, 1724)
         self.assertEqual(len(subflow_3.parameters), 11)
-        self.assertEqual(subflow_3.parameters['L'], '-1')
+        self.assertEqual(subflow_3.parameters["L"], "-1")
         self.assertEqual(len(subflow_3.components), 0)
 
     def test_get_structure(self):
@@ -80,8 +80,8 @@ def test_get_structure(self):
         openml.config.server = self.production_server
 
         flow = openml.flows.get_flow(4024)
-        flow_structure_name = flow.get_structure('name')
-        flow_structure_id = flow.get_structure('flow_id')
+        flow_structure_name = flow.get_structure("name")
+        flow_structure_id = flow.get_structure("flow_id")
         # components: root (filteredclassifier), multisearch, loginboost,
         # reptree
         self.assertEqual(len(flow_structure_name), 4)
@@ -117,33 +117,43 @@ def test_from_xml_to_xml(self):
         # TODO maybe get this via get_flow(), which would have to be refactored
         # to allow getting only the xml dictionary
         # TODO: no sklearn flows.
-        for flow_id in [3, 5, 7, 9, ]:
-            flow_xml = _perform_api_call("flow/%d" % flow_id,
-                                         request_method='get')
+        for flow_id in [
+            3,
+            5,
+            7,
+            9,
+        ]:
+            flow_xml = _perform_api_call("flow/%d" % flow_id, request_method="get")
             flow_dict = xmltodict.parse(flow_xml)
 
             flow = openml.OpenMLFlow._from_dict(flow_dict)
             new_xml = flow._to_xml()
 
             flow_xml = (
-                flow_xml.replace('  ', '').replace('\t', '').
-                strip().replace('\n\n', '\n').replace('&quot;', '"')
+                flow_xml.replace("  ", "")
+                .replace("\t", "")
+                .strip()
+                .replace("\n\n", "\n")
+                .replace("&quot;", '"')
             )
-            flow_xml = re.sub(r'^$', '', flow_xml)
+            flow_xml = re.sub(r"^$", "", flow_xml)
             new_xml = (
-                new_xml.replace('  ', '').replace('\t', '').
-                strip().replace('\n\n', '\n').replace('&quot;', '"')
+                new_xml.replace("  ", "")
+                .replace("\t", "")
+                .strip()
+                .replace("\n\n", "\n")
+                .replace("&quot;", '"')
             )
-            new_xml = re.sub(r'^$', '', new_xml)
+            new_xml = re.sub(r"^$", "", new_xml)
 
             self.assertEqual(new_xml, flow_xml)
 
     def test_to_xml_from_xml(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.tree.DecisionTreeClassifier())
-        model = sklearn.pipeline.Pipeline(steps=(
-            ('scaler', scaler), ('boosting', boosting)))
+            base_estimator=sklearn.tree.DecisionTreeClassifier()
+        )
+        model = sklearn.pipeline.Pipeline(steps=(("scaler", scaler), ("boosting", boosting)))
         flow = self.extension.model_to_flow(model)
         flow.flow_id = -234
         # end of setup
@@ -158,31 +168,29 @@ def test_to_xml_from_xml(self):
 
     def test_publish_flow(self):
         flow = openml.OpenMLFlow(
-            name='sklearn.dummy.DummyClassifier',
-            class_name='sklearn.dummy.DummyClassifier',
+            name="sklearn.dummy.DummyClassifier",
+            class_name="sklearn.dummy.DummyClassifier",
             description="test description",
             model=sklearn.dummy.DummyClassifier(),
             components=collections.OrderedDict(),
             parameters=collections.OrderedDict(),
             parameters_meta_info=collections.OrderedDict(),
             external_version=self.extension._format_external_version(
-                'sklearn',
-                sklearn.__version__,
+                "sklearn", sklearn.__version__,
             ),
             tags=[],
-            language='English',
+            language="English",
             dependencies=None,
         )
 
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
 
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         self.assertIsInstance(flow.flow_id, int)
 
-    @mock.patch('openml.flows.functions.flow_exists')
+    @mock.patch("openml.flows.functions.flow_exists")
     def test_publish_existing_flow(self, flow_exists_mock):
         clf = sklearn.tree.DecisionTreeClassifier(max_depth=2)
         flow = self.extension.model_to_flow(clf)
@@ -190,31 +198,32 @@ def test_publish_existing_flow(self, flow_exists_mock):
 
         with self.assertRaises(openml.exceptions.PyOpenMLError) as context_manager:
             flow.publish(raise_error_if_exists=True)
-            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                flow.flow_id))
+            TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
+            )
 
-        self.assertTrue('OpenMLFlow already exists' in context_manager.exception.message)
+        self.assertTrue("OpenMLFlow already exists" in context_manager.exception.message)
 
     def test_publish_flow_with_similar_components(self):
-        clf = sklearn.ensemble.VotingClassifier([
-            ('lr', sklearn.linear_model.LogisticRegression(solver='lbfgs')),
-        ])
+        clf = sklearn.ensemble.VotingClassifier(
+            [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))]
+        )
         flow = self.extension.model_to_flow(clf)
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         # For a flow where both components are published together, the upload
         # date should be equal
         self.assertEqual(
             flow.upload_date,
-            flow.components['lr'].upload_date,
+            flow.components["lr"].upload_date,
             msg=(
                 flow.name,
                 flow.flow_id,
-                flow.components['lr'].name, flow.components['lr'].flow_id,
+                flow.components["lr"].name,
+                flow.components["lr"].flow_id,
             ),
         )
 
@@ -222,36 +231,32 @@ def test_publish_flow_with_similar_components(self):
         flow1 = self.extension.model_to_flow(clf1)
         flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
         flow1.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow1.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow1.flow_id))
 
         # In order to assign different upload times to the flows!
         time.sleep(1)
 
         clf2 = sklearn.ensemble.VotingClassifier(
-            [('dt', sklearn.tree.DecisionTreeClassifier(max_depth=2))])
+            [("dt", sklearn.tree.DecisionTreeClassifier(max_depth=2))]
+        )
         flow2 = self.extension.model_to_flow(clf2)
         flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
         flow2.publish()
-        TestBase._mark_entity_for_removal('flow', (flow2.flow_id, flow2.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow2.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow2.flow_id, flow2.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow2.flow_id))
         # If one component was published before the other, the components in
         # the flow should have different upload dates
-        self.assertNotEqual(flow2.upload_date,
-                            flow2.components['dt'].upload_date)
+        self.assertNotEqual(flow2.upload_date, flow2.components["dt"].upload_date)
 
-        clf3 = sklearn.ensemble.AdaBoostClassifier(
-            sklearn.tree.DecisionTreeClassifier(max_depth=3))
+        clf3 = sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier(max_depth=3))
         flow3 = self.extension.model_to_flow(clf3)
         flow3, _ = self._add_sentinel_to_flow_name(flow3, sentinel)
         # Child flow has different parameter. Check for storing the flow
         # correctly on the server should thus not check the child's parameters!
         flow3.publish()
-        TestBase._mark_entity_for_removal('flow', (flow3.flow_id, flow3.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow3.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow3.flow_id, flow3.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id))
 
     def test_semi_legal_flow(self):
         # TODO: Test if parameters are set correctly!
@@ -259,24 +264,25 @@ def test_semi_legal_flow(self):
         # Bagging i.e., Bagging(Bagging(J48)) and Bagging(J48)
         semi_legal = sklearn.ensemble.BaggingClassifier(
             base_estimator=sklearn.ensemble.BaggingClassifier(
-                base_estimator=sklearn.tree.DecisionTreeClassifier()))
+                base_estimator=sklearn.tree.DecisionTreeClassifier()
+            )
+        )
         flow = self.extension.model_to_flow(semi_legal)
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
 
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
-    @mock.patch('openml.flows.functions.get_flow')
-    @mock.patch('openml.flows.functions.flow_exists')
-    @mock.patch('openml._api_calls._perform_api_call')
+    @mock.patch("openml.flows.functions.get_flow")
+    @mock.patch("openml.flows.functions.flow_exists")
+    @mock.patch("openml._api_calls._perform_api_call")
     def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
         model = sklearn.ensemble.RandomForestClassifier()
         flow = self.extension.model_to_flow(model)
-        api_call_mock.return_value = "<oml:upload_flow>\n" \
-                                     "    <oml:id>1</oml:id>\n" \
-                                     "</oml:upload_flow>"
+        api_call_mock.return_value = (
+            "<oml:upload_flow>\n" "    <oml:id>1</oml:id>\n" "</oml:upload_flow>"
+        )
         flow_exists_mock.return_value = False
         get_flow_mock.return_value = flow
 
@@ -294,9 +300,10 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
 
         with self.assertRaises(ValueError) as context_manager:
             flow.publish()
-            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                flow.flow_id))
+            TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
+            )
 
         fixture = (
             "The flow on the server is inconsistent with the local flow. "
@@ -315,9 +322,9 @@ def test_illegal_flow(self):
         # should throw error as it contains two imputers
         illegal = sklearn.pipeline.Pipeline(
             steps=[
-                ('imputer1', SimpleImputer()),
-                ('imputer2', SimpleImputer()),
-                ('classif', sklearn.tree.DecisionTreeClassifier())
+                ("imputer1", SimpleImputer()),
+                ("imputer2", SimpleImputer()),
+                ("classif", sklearn.tree.DecisionTreeClassifier()),
             ]
         )
         self.assertRaises(ValueError, self.extension.model_to_flow, illegal)
@@ -328,9 +335,9 @@ def get_sentinel():
             # is identified by its name and external version online. Having a
             # unique name allows us to publish the same flow in each test run
             md5 = hashlib.md5()
-            md5.update(str(time.time()).encode('utf-8'))
+            md5.update(str(time.time()).encode("utf-8"))
             sentinel = md5.hexdigest()[:10]
-            sentinel = 'TEST%s' % sentinel
+            sentinel = "TEST%s" % sentinel
             return sentinel
 
         name = get_sentinel() + get_sentinel()
@@ -343,17 +350,14 @@ def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
 
-        ohe_params = {'sparse': False, 'handle_unknown': 'ignore'}
-        if LooseVersion(sklearn.__version__) >= '0.20':
-            ohe_params['categories'] = 'auto'
+        ohe_params = {"sparse": False, "handle_unknown": "ignore"}
+        if LooseVersion(sklearn.__version__) >= "0.20":
+            ohe_params["categories"] = "auto"
         steps = [
-            ('imputation', SimpleImputer(strategy='median')),
-            ('hotencoding', sklearn.preprocessing.OneHotEncoder(**ohe_params)),
-            (
-                'variencethreshold',
-                sklearn.feature_selection.VarianceThreshold(),
-            ),
-            ('classifier', sklearn.tree.DecisionTreeClassifier())
+            ("imputation", SimpleImputer(strategy="median")),
+            ("hotencoding", sklearn.preprocessing.OneHotEncoder(**ohe_params)),
+            ("variencethreshold", sklearn.feature_selection.VarianceThreshold(),),
+            ("classifier", sklearn.tree.DecisionTreeClassifier()),
         ]
         complicated = sklearn.pipeline.Pipeline(steps=steps)
 
@@ -362,18 +366,16 @@ def test_existing_flow_exists(self):
             flow, _ = self._add_sentinel_to_flow_name(flow, None)
             # publish the flow
             flow = flow.publish()
-            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                flow.flow_id))
+            TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
+            )
             # redownload the flow
             flow = openml.flows.get_flow(flow.flow_id)
 
             # check if flow exists can find it
             flow = openml.flows.get_flow(flow.flow_id)
-            downloaded_flow_id = openml.flows.flow_exists(
-                flow.name,
-                flow.external_version,
-            )
+            downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version,)
             self.assertEqual(downloaded_flow_id, flow.flow_id)
 
     def test_sklearn_to_upload_to_flow(self):
@@ -382,34 +384,31 @@ def test_sklearn_to_upload_to_flow(self):
         y = iris.target
 
         # Test a more complicated flow
-        ohe_params = {'handle_unknown': 'ignore'}
+        ohe_params = {"handle_unknown": "ignore"}
         if LooseVersion(sklearn.__version__) >= "0.20":
-            ohe_params['categories'] = 'auto'
+            ohe_params["categories"] = "auto"
         ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         pca = sklearn.decomposition.TruncatedSVD()
         fs = sklearn.feature_selection.SelectPercentile(
-            score_func=sklearn.feature_selection.f_classif, percentile=30)
-        fu = sklearn.pipeline.FeatureUnion(transformer_list=[
-            ('pca', pca), ('fs', fs)])
+            score_func=sklearn.feature_selection.f_classif, percentile=30
+        )
+        fu = sklearn.pipeline.FeatureUnion(transformer_list=[("pca", pca), ("fs", fs)])
         boosting = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.tree.DecisionTreeClassifier())
+            base_estimator=sklearn.tree.DecisionTreeClassifier()
+        )
         model = sklearn.pipeline.Pipeline(
-            steps=[
-                ('ohe', ohe),
-                ('scaler', scaler),
-                ('fu', fu),
-                ('boosting', boosting),
-            ]
+            steps=[("ohe", ohe), ("scaler", scaler), ("fu", fu), ("boosting", boosting)]
         )
         parameter_grid = {
-            'boosting__n_estimators': [1, 5, 10, 100],
-            'boosting__learning_rate': scipy.stats.uniform(0.01, 0.99),
-            'boosting__base_estimator__max_depth': scipy.stats.randint(1, 10),
+            "boosting__n_estimators": [1, 5, 10, 100],
+            "boosting__learning_rate": scipy.stats.uniform(0.01, 0.99),
+            "boosting__base_estimator__max_depth": scipy.stats.randint(1, 10),
         }
         cv = sklearn.model_selection.StratifiedKFold(n_splits=5, shuffle=True)
         rs = sklearn.model_selection.RandomizedSearchCV(
-            estimator=model, param_distributions=parameter_grid, cv=cv)
+            estimator=model, param_distributions=parameter_grid, cv=cv
+        )
         rs.fit(X, y)
         flow = self.extension.model_to_flow(rs)
         # Tags may be sorted in any order (by the server). Just using one tag
@@ -423,9 +422,8 @@ def test_sklearn_to_upload_to_flow(self):
         flow, sentinel = self._add_sentinel_to_flow_name(flow, None)
 
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         self.assertIsInstance(flow.flow_id, int)
 
         # Check whether we can load the flow again
@@ -438,18 +436,24 @@ def test_sklearn_to_upload_to_flow(self):
 
         for i in range(10):
             # Make sure that we replace all occurences of two newlines
-            local_xml = local_xml.replace(sentinel, '')
+            local_xml = local_xml.replace(sentinel, "")
             local_xml = (
-                local_xml.replace('  ', '').replace('\t', '').
-                strip().replace('\n\n', '\n').replace('&quot;', '"')
+                local_xml.replace("  ", "")
+                .replace("\t", "")
+                .strip()
+                .replace("\n\n", "\n")
+                .replace("&quot;", '"')
             )
-            local_xml = re.sub(r'(^$)', '', local_xml)
-            server_xml = server_xml.replace(sentinel, '')
+            local_xml = re.sub(r"(^$)", "", local_xml)
+            server_xml = server_xml.replace(sentinel, "")
             server_xml = (
-                server_xml.replace('  ', '').replace('\t', '').
-                strip().replace('\n\n', '\n').replace('&quot;', '"')
+                server_xml.replace("  ", "")
+                .replace("\t", "")
+                .strip()
+                .replace("\n\n", "\n")
+                .replace("&quot;", '"')
             )
-            server_xml = re.sub(r'^$', '', server_xml)
+            server_xml = re.sub(r"^$", "", server_xml)
 
         self.assertEqual(server_xml, local_xml)
 
@@ -458,20 +462,18 @@ def test_sklearn_to_upload_to_flow(self):
         self.assertIsNot(new_flow, flow)
 
         # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = ('_encoders'
-                               if LooseVersion(sklearn.__version__) >= "0.20"
-                               else 'data')
+        module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
         fixture_name = (
-            '%ssklearn.model_selection._search.RandomizedSearchCV('
-            'estimator=sklearn.pipeline.Pipeline('
-            'ohe=sklearn.preprocessing.%s.OneHotEncoder,'
-            'scaler=sklearn.preprocessing.data.StandardScaler,'
-            'fu=sklearn.pipeline.FeatureUnion('
-            'pca=sklearn.decomposition.truncated_svd.TruncatedSVD,'
-            'fs='
-            'sklearn.feature_selection.univariate_selection.SelectPercentile),'
-            'boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier('
-            'base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))'
+            "%ssklearn.model_selection._search.RandomizedSearchCV("
+            "estimator=sklearn.pipeline.Pipeline("
+            "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
+            "scaler=sklearn.preprocessing.data.StandardScaler,"
+            "fu=sklearn.pipeline.FeatureUnion("
+            "pca=sklearn.decomposition.truncated_svd.TruncatedSVD,"
+            "fs="
+            "sklearn.feature_selection.univariate_selection.SelectPercentile),"
+            "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier("
+            "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))"
             % (sentinel, module_name_encoder)
         )
         self.assertEqual(new_flow.name, fixture_name)
@@ -480,14 +482,13 @@ def test_sklearn_to_upload_to_flow(self):
     def test_extract_tags(self):
         flow_xml = "<oml:tag>study_14</oml:tag>"
         flow_dict = xmltodict.parse(flow_xml)
-        tags = openml.utils.extract_xml_tags('oml:tag', flow_dict)
-        self.assertEqual(tags, ['study_14'])
+        tags = openml.utils.extract_xml_tags("oml:tag", flow_dict)
+        self.assertEqual(tags, ["study_14"])
 
-        flow_xml = "<oml:flow><oml:tag>OpenmlWeka</oml:tag>\n" \
-                   "<oml:tag>weka</oml:tag></oml:flow>"
+        flow_xml = "<oml:flow><oml:tag>OpenmlWeka</oml:tag>\n" "<oml:tag>weka</oml:tag></oml:flow>"
         flow_dict = xmltodict.parse(flow_xml)
-        tags = openml.utils.extract_xml_tags('oml:tag', flow_dict['oml:flow'])
-        self.assertEqual(tags, ['OpenmlWeka', 'weka'])
+        tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"])
+        self.assertEqual(tags, ["OpenmlWeka", "weka"])
 
     def test_download_non_scikit_learn_flows(self):
         openml.config.server = self.production_server
@@ -503,7 +504,7 @@ def test_download_non_scikit_learn_flows(self):
         self.assertIsInstance(subflow_1, openml.OpenMLFlow)
         self.assertEqual(subflow_1.flow_id, 6743)
         self.assertEqual(len(subflow_1.parameters), 8)
-        self.assertEqual(subflow_1.parameters['U'], '0')
+        self.assertEqual(subflow_1.parameters["U"], "0")
         self.assertEqual(len(subflow_1.components), 1)
         self.assertIsNone(subflow_1.model)
 
@@ -511,6 +512,6 @@ def test_download_non_scikit_learn_flows(self):
         self.assertIsInstance(subflow_2, openml.OpenMLFlow)
         self.assertEqual(subflow_2.flow_id, 5888)
         self.assertEqual(len(subflow_2.parameters), 4)
-        self.assertIsNone(subflow_2.parameters['batch-size'])
+        self.assertIsNone(subflow_2.parameters["batch-size"])
         self.assertEqual(len(subflow_2.components), 0)
         self.assertIsNone(subflow_2.model)
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 5a189b996..12af05ffe 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -26,13 +26,14 @@ def tearDown(self):
     def _check_flow(self, flow):
         self.assertEqual(type(flow), dict)
         self.assertEqual(len(flow), 6)
-        self.assertIsInstance(flow['id'], int)
-        self.assertIsInstance(flow['name'], str)
-        self.assertIsInstance(flow['full_name'], str)
-        self.assertIsInstance(flow['version'], str)
+        self.assertIsInstance(flow["id"], int)
+        self.assertIsInstance(flow["name"], str)
+        self.assertIsInstance(flow["full_name"], str)
+        self.assertIsInstance(flow["version"], str)
         # There are some runs on openml.org that can have an empty external version
-        ext_version_str_or_none = (isinstance(flow['external_version'], str)
-                                   or flow['external_version'] is None)
+        ext_version_str_or_none = (
+            isinstance(flow["external_version"], str) or flow["external_version"] is None
+        )
         self.assertTrue(ext_version_str_or_none)
 
     def test_list_flows(self):
@@ -49,23 +50,21 @@ def test_list_flows_output_format(self):
         openml.config.server = self.production_server
         # We can only perform a smoke test here because we test on dynamic
         # data from the internet...
-        flows = openml.flows.list_flows(output_format='dataframe')
+        flows = openml.flows.list_flows(output_format="dataframe")
         self.assertIsInstance(flows, pd.DataFrame)
         self.assertGreaterEqual(len(flows), 1500)
 
     def test_list_flows_empty(self):
         openml.config.server = self.production_server
-        flows = openml.flows.list_flows(tag='NoOneEverUsesThisTag123')
+        flows = openml.flows.list_flows(tag="NoOneEverUsesThisTag123")
         if len(flows) > 0:
-            raise ValueError(
-                'UnitTest Outdated, got somehow results (please adapt)'
-            )
+            raise ValueError("UnitTest Outdated, got somehow results (please adapt)")
 
         self.assertIsInstance(flows, dict)
 
     def test_list_flows_by_tag(self):
         openml.config.server = self.production_server
-        flows = openml.flows.list_flows(tag='weka')
+        flows = openml.flows.list_flows(tag="weka")
         self.assertGreaterEqual(len(flows), 5)
         for did in flows:
             self._check_flow(flows[did])
@@ -81,174 +80,196 @@ def test_list_flows_paginate(self):
                 self._check_flow(flows[did])
 
     def test_are_flows_equal(self):
-        flow = openml.flows.OpenMLFlow(name='Test',
-                                       description='Test flow',
-                                       model=None,
-                                       components=OrderedDict(),
-                                       parameters=OrderedDict(),
-                                       parameters_meta_info=OrderedDict(),
-                                       external_version='1',
-                                       tags=['abc', 'def'],
-                                       language='English',
-                                       dependencies='abc',
-                                       class_name='Test',
-                                       custom_name='Test')
+        flow = openml.flows.OpenMLFlow(
+            name="Test",
+            description="Test flow",
+            model=None,
+            components=OrderedDict(),
+            parameters=OrderedDict(),
+            parameters_meta_info=OrderedDict(),
+            external_version="1",
+            tags=["abc", "def"],
+            language="English",
+            dependencies="abc",
+            class_name="Test",
+            custom_name="Test",
+        )
 
         # Test most important values that can be set by a user
         openml.flows.functions.assert_flows_equal(flow, flow)
-        for attribute, new_value in [('name', 'Tes'),
-                                     ('external_version', '2'),
-                                     ('language', 'english'),
-                                     ('dependencies', 'ab'),
-                                     ('class_name', 'Tes'),
-                                     ('custom_name', 'Tes')]:
+        for attribute, new_value in [
+            ("name", "Tes"),
+            ("external_version", "2"),
+            ("language", "english"),
+            ("dependencies", "ab"),
+            ("class_name", "Tes"),
+            ("custom_name", "Tes"),
+        ]:
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute),
-                getattr(new_flow, attribute),
+                getattr(flow, attribute), getattr(new_flow, attribute),
             )
             self.assertRaises(
-                ValueError,
-                openml.flows.functions.assert_flows_equal,
-                flow,
-                new_flow,
+                ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow,
             )
 
         # Test that the API ignores several keys when comparing flows
         openml.flows.functions.assert_flows_equal(flow, flow)
-        for attribute, new_value in [('flow_id', 1),
-                                     ('uploader', 1),
-                                     ('version', 1),
-                                     ('upload_date', '18.12.1988'),
-                                     ('binary_url', 'openml.org'),
-                                     ('binary_format', 'gzip'),
-                                     ('binary_md5', '12345'),
-                                     ('model', []),
-                                     ('tags', ['abc', 'de'])]:
+        for attribute, new_value in [
+            ("flow_id", 1),
+            ("uploader", 1),
+            ("version", 1),
+            ("upload_date", "18.12.1988"),
+            ("binary_url", "openml.org"),
+            ("binary_format", "gzip"),
+            ("binary_md5", "12345"),
+            ("model", []),
+            ("tags", ["abc", "de"]),
+        ]:
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute),
-                getattr(new_flow, attribute),
+                getattr(flow, attribute), getattr(new_flow, attribute),
             )
             openml.flows.functions.assert_flows_equal(flow, new_flow)
 
         # Now test for parameters
-        flow.parameters['abc'] = 1.0
-        flow.parameters['def'] = 2.0
+        flow.parameters["abc"] = 1.0
+        flow.parameters["def"] = 2.0
         openml.flows.functions.assert_flows_equal(flow, flow)
         new_flow = copy.deepcopy(flow)
-        new_flow.parameters['abc'] = 3.0
-        self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal,
-                          flow, new_flow)
+        new_flow.parameters["abc"] = 3.0
+        self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow)
 
         # Now test for components (subflows)
         parent_flow = copy.deepcopy(flow)
         subflow = copy.deepcopy(flow)
-        parent_flow.components['subflow'] = subflow
+        parent_flow.components["subflow"] = subflow
         openml.flows.functions.assert_flows_equal(parent_flow, parent_flow)
-        self.assertRaises(ValueError,
-                          openml.flows.functions.assert_flows_equal,
-                          parent_flow, subflow)
+        self.assertRaises(
+            ValueError, openml.flows.functions.assert_flows_equal, parent_flow, subflow
+        )
         new_flow = copy.deepcopy(parent_flow)
-        new_flow.components['subflow'].name = 'Subflow name'
-        self.assertRaises(ValueError,
-                          openml.flows.functions.assert_flows_equal,
-                          parent_flow, new_flow)
+        new_flow.components["subflow"].name = "Subflow name"
+        self.assertRaises(
+            ValueError, openml.flows.functions.assert_flows_equal, parent_flow, new_flow
+        )
 
     def test_are_flows_equal_ignore_parameter_values(self):
-        paramaters = OrderedDict((('a', 5), ('b', 6)))
-        parameters_meta_info = OrderedDict((('a', None), ('b', None)))
+        paramaters = OrderedDict((("a", 5), ("b", 6)))
+        parameters_meta_info = OrderedDict((("a", None), ("b", None)))
 
         flow = openml.flows.OpenMLFlow(
-            name='Test',
-            description='Test flow',
+            name="Test",
+            description="Test flow",
             model=None,
             components=OrderedDict(),
             parameters=paramaters,
             parameters_meta_info=parameters_meta_info,
-            external_version='1',
-            tags=['abc', 'def'],
-            language='English',
-            dependencies='abc',
-            class_name='Test',
-            custom_name='Test',
+            external_version="1",
+            tags=["abc", "def"],
+            language="English",
+            dependencies="abc",
+            class_name="Test",
+            custom_name="Test",
         )
 
         openml.flows.functions.assert_flows_equal(flow, flow)
-        openml.flows.functions.assert_flows_equal(flow, flow,
-                                                  ignore_parameter_values=True)
+        openml.flows.functions.assert_flows_equal(flow, flow, ignore_parameter_values=True)
 
         new_flow = copy.deepcopy(flow)
-        new_flow.parameters['a'] = 7
+        new_flow.parameters["a"] = 7
         self.assertRaisesRegex(
             ValueError,
             r"values for attribute 'parameters' differ: "
             r"'OrderedDict\(\[\('a', 5\), \('b', 6\)\]\)'\nvs\n"
             r"'OrderedDict\(\[\('a', 7\), \('b', 6\)\]\)'",
             openml.flows.functions.assert_flows_equal,
-            flow, new_flow,
+            flow,
+            new_flow,
         )
-        openml.flows.functions.assert_flows_equal(flow, new_flow,
-                                                  ignore_parameter_values=True)
+        openml.flows.functions.assert_flows_equal(flow, new_flow, ignore_parameter_values=True)
 
-        del new_flow.parameters['a']
+        del new_flow.parameters["a"]
         self.assertRaisesRegex(
             ValueError,
             r"values for attribute 'parameters' differ: "
             r"'OrderedDict\(\[\('a', 5\), \('b', 6\)\]\)'\nvs\n"
             r"'OrderedDict\(\[\('b', 6\)\]\)'",
             openml.flows.functions.assert_flows_equal,
-            flow, new_flow,
+            flow,
+            new_flow,
         )
         self.assertRaisesRegex(
             ValueError,
             r"Flow Test: parameter set of flow differs from the parameters "
             r"stored on the server.",
             openml.flows.functions.assert_flows_equal,
-            flow, new_flow, ignore_parameter_values=True,
+            flow,
+            new_flow,
+            ignore_parameter_values=True,
         )
 
     def test_are_flows_equal_ignore_if_older(self):
-        paramaters = OrderedDict((('a', 5), ('b', 6)))
-        parameters_meta_info = OrderedDict((('a', None), ('b', None)))
-        flow_upload_date = '2017-01-31T12-01-01'
+        paramaters = OrderedDict((("a", 5), ("b", 6)))
+        parameters_meta_info = OrderedDict((("a", None), ("b", None)))
+        flow_upload_date = "2017-01-31T12-01-01"
         assert_flows_equal = openml.flows.functions.assert_flows_equal
 
-        flow = openml.flows.OpenMLFlow(name='Test',
-                                       description='Test flow',
-                                       model=None,
-                                       components=OrderedDict(),
-                                       parameters=paramaters,
-                                       parameters_meta_info=parameters_meta_info,
-                                       external_version='1',
-                                       tags=['abc', 'def'],
-                                       language='English',
-                                       dependencies='abc',
-                                       class_name='Test',
-                                       custom_name='Test',
-                                       upload_date=flow_upload_date)
+        flow = openml.flows.OpenMLFlow(
+            name="Test",
+            description="Test flow",
+            model=None,
+            components=OrderedDict(),
+            parameters=paramaters,
+            parameters_meta_info=parameters_meta_info,
+            external_version="1",
+            tags=["abc", "def"],
+            language="English",
+            dependencies="abc",
+            class_name="Test",
+            custom_name="Test",
+            upload_date=flow_upload_date,
+        )
 
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=flow_upload_date)
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
         new_flow = copy.deepcopy(flow)
-        new_flow.parameters['a'] = 7
-        self.assertRaises(ValueError, assert_flows_equal, flow, new_flow,
-                          ignore_parameter_values_on_older_children=flow_upload_date)
-        self.assertRaises(ValueError, assert_flows_equal, flow, new_flow,
-                          ignore_parameter_values_on_older_children=None)
-
-        new_flow.upload_date = '2016-01-31T12-01-01'
-        self.assertRaises(ValueError, assert_flows_equal, flow, new_flow,
-                          ignore_parameter_values_on_older_children=flow_upload_date)
+        new_flow.parameters["a"] = 7
+        self.assertRaises(
+            ValueError,
+            assert_flows_equal,
+            flow,
+            new_flow,
+            ignore_parameter_values_on_older_children=flow_upload_date,
+        )
+        self.assertRaises(
+            ValueError,
+            assert_flows_equal,
+            flow,
+            new_flow,
+            ignore_parameter_values_on_older_children=None,
+        )
+
+        new_flow.upload_date = "2016-01-31T12-01-01"
+        self.assertRaises(
+            ValueError,
+            assert_flows_equal,
+            flow,
+            new_flow,
+            ignore_parameter_values_on_older_children=flow_upload_date,
+        )
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
-                     reason="OrdinalEncoder introduced in 0.20. "
-                            "No known models with list of lists parameters in older versions.")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="OrdinalEncoder introduced in 0.20. "
+        "No known models with list of lists parameters in older versions.",
+    )
     def test_sklearn_to_flow_list_of_lists(self):
         from sklearn.preprocessing import OrdinalEncoder
+
         ordinal_encoder = OrdinalEncoder(categories=[[0, 1], [0, 1]])
         extension = openml.extensions.sklearn.SklearnExtension()
 
@@ -258,11 +279,11 @@ def test_sklearn_to_flow_list_of_lists(self):
         # Test flow is accepted by server
         self._add_sentinel_to_flow_name(flow)
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         # Test deserialization works
         server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
-        self.assertEqual(server_flow.parameters['categories'], '[[0, 1], [0, 1]]')
+        self.assertEqual(server_flow.parameters["categories"], "[[0, 1], [0, 1]]")
         self.assertEqual(server_flow.model.categories, flow.model.categories)
 
     def test_get_flow1(self):
@@ -277,38 +298,37 @@ def test_get_flow_reinstantiate_model(self):
         extension = openml.extensions.get_extension_by_model(model)
         flow = extension.model_to_flow(model)
         flow.publish(raise_error_if_exists=False)
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
         downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
         self.assertIsInstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier)
 
     def test_get_flow_reinstantiate_model_no_extension(self):
         # Flow 10 is a WEKA flow
-        self.assertRaisesRegex(RuntimeError,
-                               "No extension could be found for flow 10: weka.SMO",
-                               openml.flows.get_flow,
-                               flow_id=10,
-                               reinstantiate=True)
-
-    @unittest.skipIf(LooseVersion(sklearn.__version__) == "0.19.1",
-                     reason="Target flow is from sklearn 0.19.1")
+        self.assertRaisesRegex(
+            RuntimeError,
+            "No extension could be found for flow 10: weka.SMO",
+            openml.flows.get_flow,
+            flow_id=10,
+            reinstantiate=True,
+        )
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) == "0.19.1", reason="Target flow is from sklearn 0.19.1"
+    )
     def test_get_flow_reinstantiate_model_wrong_version(self):
         # Note that CI does not test against 0.19.1.
         openml.config.server = self.production_server
         _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
         flow = 8175
-        expected = ('Trying to deserialize a model with dependency'
-                    ' sklearn==0.19.1 not satisfied.')
-        self.assertRaisesRegex(ValueError,
-                               expected,
-                               openml.flows.get_flow,
-                               flow_id=flow,
-                               reinstantiate=True)
+        expected = "Trying to deserialize a model with dependency" " sklearn==0.19.1 not satisfied."
+        self.assertRaisesRegex(
+            ValueError, expected, openml.flows.get_flow, flow_id=flow, reinstantiate=True
+        )
         if LooseVersion(sklearn.__version__) > "0.19.1":
             # 0.18 actually can't deserialize this because of incompatibility
-            flow = openml.flows.get_flow(flow_id=flow, reinstantiate=True,
-                                         strict_version=False)
+            flow = openml.flows.get_flow(flow_id=flow, reinstantiate=True, strict_version=False)
             # ensure that a new flow was created
             assert flow.flow_id is None
             assert "0.19.1" not in flow.dependencies
@@ -326,8 +346,7 @@ def test_get_flow_id(self):
         # whether exact_version is set to True or False.
         flow_ids_exact_version_True = openml.flows.get_flow_id(name=flow.name, exact_version=True)
         flow_ids_exact_version_False = openml.flows.get_flow_id(
-            name=flow.name,
-            exact_version=False,
+            name=flow.name, exact_version=False,
         )
         self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
         self.assertIn(flow.flow_id, flow_ids_exact_version_True)
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index 1748608bb..8b470a45b 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -3,10 +3,8 @@
 
 
 class TestConfig(openml.testing.TestBase):
-
     def test_too_long_uri(self):
         with self.assertRaisesRegex(
-            openml.exceptions.OpenMLServerError,
-            'URI too long!',
+            openml.exceptions.OpenMLServerError, "URI too long!",
         ):
             openml.datasets.list_datasets(data_id=list(range(10000)))
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index d4331a169..88136dbd9 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -7,14 +7,12 @@
 
 
 class TestConfig(openml.testing.TestBase):
-
     def test_config_loading(self):
         self.assertTrue(os.path.exists(openml.config.config_file))
-        self.assertTrue(os.path.isdir(os.path.expanduser('~/.openml')))
+        self.assertTrue(os.path.isdir(os.path.expanduser("~/.openml")))
 
 
 class TestConfigurationForExamples(openml.testing.TestBase):
-
     def test_switch_to_example_configuration(self):
         """ Verifies the test configuration is loaded properly. """
         # Below is the default test key which would be used anyway, but just for clarity:
@@ -41,8 +39,9 @@ def test_switch_from_example_configuration(self):
     def test_example_configuration_stop_before_start(self):
         """ Verifies an error is raised is `stop_...` is called before `start_...`. """
         error_regex = ".*stop_use_example_configuration.*start_use_example_configuration.*first"
-        self.assertRaisesRegex(RuntimeError, error_regex,
-                               openml.config.stop_using_configuration_for_example)
+        self.assertRaisesRegex(
+            RuntimeError, error_regex, openml.config.stop_using_configuration_for_example
+        )
 
     def test_example_configuration_start_twice(self):
         """ Checks that the original config can be returned to if `start..` is called twice. """
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index eda4af948..80f5e67f0 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -10,19 +10,14 @@ class TestInit(TestBase):
     # Splitting not helpful, these test's don't rely on the server and take less
     # than 1 seconds
 
-    @mock.patch('openml.tasks.functions.get_task')
-    @mock.patch('openml.datasets.functions.get_dataset')
-    @mock.patch('openml.flows.functions.get_flow')
-    @mock.patch('openml.runs.functions.get_run')
+    @mock.patch("openml.tasks.functions.get_task")
+    @mock.patch("openml.datasets.functions.get_dataset")
+    @mock.patch("openml.flows.functions.get_flow")
+    @mock.patch("openml.runs.functions.get_run")
     def test_populate_cache(
-        self,
-        run_mock,
-        flow_mock,
-        dataset_mock,
-        task_mock,
+        self, run_mock, flow_mock, dataset_mock, task_mock,
     ):
-        openml.populate_cache(task_ids=[1, 2], dataset_ids=[3, 4],
-                              flow_ids=[5, 6], run_ids=[7, 8])
+        openml.populate_cache(task_ids=[1, 2], dataset_ids=[3, 4], flow_ids=[5, 6], run_ids=[7, 8])
         self.assertEqual(run_mock.call_count, 2)
         for argument, fixture in zip(run_mock.call_args_list, [(7,), (8,)]):
             self.assertEqual(argument[0], fixture)
@@ -32,10 +27,7 @@ def test_populate_cache(
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(dataset_mock.call_count, 2)
-        for argument, fixture in zip(
-                dataset_mock.call_args_list,
-                [(3,), (4,)],
-        ):
+        for argument, fixture in zip(dataset_mock.call_args_list, [(3,), (4,)],):
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(task_mock.call_count, 2)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 1d7c9bb18..864863f4a 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -38,39 +38,35 @@ def test_tagging(self):
         self.assertEqual(len(run_list), 0)
 
     def _test_run_obj_equals(self, run, run_prime):
-        for dictionary in ['evaluations', 'fold_evaluations',
-                           'sample_evaluations']:
+        for dictionary in ["evaluations", "fold_evaluations", "sample_evaluations"]:
             if getattr(run, dictionary) is not None:
-                self.assertDictEqual(getattr(run, dictionary),
-                                     getattr(run_prime, dictionary))
+                self.assertDictEqual(getattr(run, dictionary), getattr(run_prime, dictionary))
             else:
                 # should be none or empty
                 other = getattr(run_prime, dictionary)
                 if other is not None:
                     self.assertDictEqual(other, dict())
-        self.assertEqual(run._to_xml(),
-                         run_prime._to_xml())
+        self.assertEqual(run._to_xml(), run_prime._to_xml())
 
-        numeric_part = \
-            np.array(np.array(run.data_content)[:, 0:-2], dtype=float)
-        numeric_part_prime = \
-            np.array(np.array(run_prime.data_content)[:, 0:-2], dtype=float)
+        numeric_part = np.array(np.array(run.data_content)[:, 0:-2], dtype=float)
+        numeric_part_prime = np.array(np.array(run_prime.data_content)[:, 0:-2], dtype=float)
         string_part = np.array(run.data_content)[:, -2:]
         string_part_prime = np.array(run_prime.data_content)[:, -2:]
         np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
         np.testing.assert_array_equal(string_part, string_part_prime)
 
         if run.trace is not None:
-            run_trace_content = run.trace.trace_to_arff()['data']
+            run_trace_content = run.trace.trace_to_arff()["data"]
         else:
             run_trace_content = None
 
         if run_prime.trace is not None:
-            run_prime_trace_content = run_prime.trace.trace_to_arff()['data']
+            run_prime_trace_content = run_prime.trace.trace_to_arff()["data"]
         else:
             run_prime_trace_content = None
 
         if run_trace_content is not None:
+
             def _check_array(array, type_):
                 for line in array:
                     for entry in line:
@@ -81,19 +77,13 @@ def _check_array(array, type_):
             int_part_prime = [line[:3] for line in run_prime_trace_content]
             _check_array(int_part_prime, int)
 
-            float_part = np.array(
-                np.array(run_trace_content)[:, 3:4],
-                dtype=float,
-            )
-            float_part_prime = np.array(
-                np.array(run_prime_trace_content)[:, 3:4],
-                dtype=float,
-            )
+            float_part = np.array(np.array(run_trace_content)[:, 3:4], dtype=float,)
+            float_part_prime = np.array(np.array(run_prime_trace_content)[:, 3:4], dtype=float,)
             bool_part = [line[4] for line in run_trace_content]
             bool_part_prime = [line[4] for line in run_prime_trace_content]
             for bp, bpp in zip(bool_part, bool_part_prime):
-                self.assertIn(bp, ['true', 'false'])
-                self.assertIn(bpp, ['true', 'false'])
+                self.assertIn(bp, ["true", "false"])
+                self.assertIn(bpp, ["true", "false"])
             string_part = np.array(run_trace_content)[:, 5:]
             string_part_prime = np.array(run_prime_trace_content)[:, 5:]
 
@@ -106,24 +96,22 @@ def _check_array(array, type_):
 
     def test_to_from_filesystem_vanilla(self):
 
-        model = Pipeline([
-            ('imputer', SimpleImputer(strategy='mean')),
-            ('classifier', DecisionTreeClassifier(max_depth=1)),
-        ])
+        model = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                ("classifier", DecisionTreeClassifier(max_depth=1)),
+            ]
+        )
         task = openml.tasks.get_task(119)
         run = openml.runs.run_model_on_task(
             model=model,
             task=task,
             add_local_measures=False,
             avoid_duplicate_runs=False,
-            upload_flow=True
+            upload_flow=True,
         )
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
         run.to_filesystem(cache_path)
 
         run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
@@ -132,70 +120,58 @@ def test_to_from_filesystem_vanilla(self):
         self.assertTrue(run_prime.flow is None)
         self._test_run_obj_equals(run, run_prime)
         run_prime.publish()
-        TestBase._mark_entity_for_removal('run', run_prime.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            run_prime.run_id))
+        TestBase._mark_entity_for_removal("run", run_prime.run_id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
+        )
 
     @pytest.mark.flaky()
     def test_to_from_filesystem_search(self):
 
-        model = Pipeline([
-            ('imputer', SimpleImputer(strategy='mean')),
-            ('classifier', DecisionTreeClassifier(max_depth=1)),
-        ])
+        model = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                ("classifier", DecisionTreeClassifier(max_depth=1)),
+            ]
+        )
         model = GridSearchCV(
             estimator=model,
             param_grid={
                 "classifier__max_depth": [1, 2, 3, 4, 5],
-                "imputer__strategy": ['mean', 'median'],
-            }
+                "imputer__strategy": ["mean", "median"],
+            },
         )
 
         task = openml.tasks.get_task(119)
         run = openml.runs.run_model_on_task(
-            model=model,
-            task=task,
-            add_local_measures=False,
-            avoid_duplicate_runs=False,
+            model=model, task=task, add_local_measures=False, avoid_duplicate_runs=False,
         )
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
         run.to_filesystem(cache_path)
 
         run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
         self._test_run_obj_equals(run, run_prime)
         run_prime.publish()
-        TestBase._mark_entity_for_removal('run', run_prime.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            run_prime.run_id))
+        TestBase._mark_entity_for_removal("run", run_prime.run_id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
+        )
 
     def test_to_from_filesystem_no_model(self):
 
-        model = Pipeline([
-            ('imputer', SimpleImputer(strategy='mean')),
-            ('classifier', DummyClassifier()),
-        ])
-        task = openml.tasks.get_task(119)
-        run = openml.runs.run_model_on_task(
-            model=model,
-            task=task,
-            add_local_measures=False,
+        model = Pipeline(
+            [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
         )
+        task = openml.tasks.get_task(119)
+        run = openml.runs.run_model_on_task(model=model, task=task, add_local_measures=False)
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
         run.to_filesystem(cache_path, store_model=False)
         # obtain run from filesystem
         openml.runs.OpenMLRun.from_filesystem(cache_path, expect_model=False)
         # assert default behaviour is throwing an error
-        with self.assertRaises(ValueError, msg='Could not find model.pkl'):
+        with self.assertRaises(ValueError, msg="Could not find model.pkl"):
             openml.runs.OpenMLRun.from_filesystem(cache_path)
 
     def test_publish_with_local_loaded_flow(self):
@@ -205,10 +181,9 @@ def test_publish_with_local_loaded_flow(self):
         """
         extension = openml.extensions.sklearn.SklearnExtension()
 
-        model = Pipeline([
-            ('imputer', SimpleImputer(strategy='mean')),
-            ('classifier', DummyClassifier()),
-        ])
+        model = Pipeline(
+            [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
+        )
         task = openml.tasks.get_task(119)
 
         # Make sure the flow does not exist on the server yet.
@@ -221,24 +196,21 @@ def test_publish_with_local_loaded_flow(self):
             task=task,
             add_local_measures=False,
             avoid_duplicate_runs=False,
-            upload_flow=False
+            upload_flow=False,
         )
 
         # Make sure that the flow has not been uploaded as requested.
         self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
         run.to_filesystem(cache_path)
         # obtain run from filesystem
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
         loaded_run.publish()
-        TestBase._mark_entity_for_removal('run', loaded_run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                            loaded_run.run_id))
+        TestBase._mark_entity_for_removal("run", loaded_run.run_id)
+        TestBase.logger.info(
+            "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
+        )
 
         # make sure the flow is published as part of publishing the run.
         self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 854061148..728467aa2 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1,4 +1,5 @@
 # License: BSD 3-Clause
+from typing import Tuple, List, Union
 
 import arff
 from distutils.version import LooseVersion
@@ -35,12 +36,10 @@
 from sklearn.dummy import DummyClassifier
 from sklearn.preprocessing import StandardScaler
 from sklearn.feature_selection import VarianceThreshold
-from sklearn.linear_model import LogisticRegression, SGDClassifier, \
-    LinearRegression
+from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
 from sklearn.svm import SVC
-from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \
-    StratifiedKFold
+from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
 from sklearn.pipeline import Pipeline
 
 
@@ -48,13 +47,17 @@ class TestRun(TestBase):
     _multiprocess_can_split_ = True
     # diabetis dataset, 768 observations, 0 missing vals, 33% holdout set
     # (253 test obs), no nominal attributes, all numeric attributes
-    TEST_SERVER_TASK_SIMPLE = (119, 0, 253, list(), list(range(8)))
-    TEST_SERVER_TASK_REGRESSION = (738, 0, 718, list(), list(range(8)))
+    TEST_SERVER_TASK_SIMPLE: Tuple[Union[int, List], ...] = (119, 0, 253, [], [*range(8)])
+    TEST_SERVER_TASK_REGRESSION: Tuple[Union[int, List], ...] = (738, 0, 718, [], [*range(8)])
     # credit-a dataset, 690 observations, 67 missing vals, 33% holdout set
     # (227 test obs)
-    TEST_SERVER_TASK_MISSING_VALS = (96, 67, 227,
-                                     [0, 3, 4, 5, 6, 8, 9, 11, 12],
-                                     [1, 2, 7, 10, 13, 14])
+    TEST_SERVER_TASK_MISSING_VALS = (
+        96,
+        67,
+        227,
+        [0, 3, 4, 5, 6, 8, 9, 11, 12],
+        [1, 2, 7, 10, 13, 14],
+    )
 
     # Suppress warnings to facilitate testing
     hide_warnings = True
@@ -82,42 +85,42 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
                 return
             else:
                 time.sleep(3)
-        raise RuntimeError('Could not find any evaluations! Please check whether run {} was '
-                           'evaluated correctly on the server'.format(run_id))
+        raise RuntimeError(
+            "Could not find any evaluations! Please check whether run {} was "
+            "evaluated correctly on the server".format(run_id)
+        )
 
     def _compare_predictions(self, predictions, predictions_prime):
-        self.assertEqual(np.array(predictions_prime['data']).shape,
-                         np.array(predictions['data']).shape)
+        self.assertEqual(
+            np.array(predictions_prime["data"]).shape, np.array(predictions["data"]).shape
+        )
 
         # The original search model does not submit confidence
         # bounds, so we can not compare the arff line
         compare_slice = [0, 1, 2, -1, -2]
-        for idx in range(len(predictions['data'])):
+        for idx in range(len(predictions["data"])):
             # depends on the assumption "predictions are in same order"
             # that does not necessarily hold.
             # But with the current code base, it holds.
             for col_idx in compare_slice:
-                val_1 = predictions['data'][idx][col_idx]
-                val_2 = predictions_prime['data'][idx][col_idx]
+                val_1 = predictions["data"][idx][col_idx]
+                val_2 = predictions_prime["data"][idx][col_idx]
                 if type(val_1) == float or type(val_2) == float:
                     self.assertAlmostEqual(
-                        float(val_1),
-                        float(val_2),
-                        places=6,
+                        float(val_1), float(val_2), places=6,
                     )
                 else:
                     self.assertEqual(val_1, val_2)
 
         return True
 
-    def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed,
-                                             create_task_obj):
+    def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create_task_obj):
         run = openml.runs.get_run(run_id)
 
         # TODO: assert holdout task
 
         # downloads the predictions of the old task
-        file_id = run.output_files['predictions']
+        file_id = run.output_files["predictions"]
         predictions_url = openml._api_calls._file_id_to_url(file_id)
         response = openml._api_calls._download_text_file(predictions_url)
         predictions = arff.loads(response)
@@ -126,26 +129,28 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed,
         if create_task_obj:
             task = openml.tasks.get_task(run.task_id)
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime,
-                task=task,
-                avoid_duplicate_runs=False,
-                seed=seed,
+                model=model_prime, task=task, avoid_duplicate_runs=False, seed=seed,
             )
         else:
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime,
-                task=run.task_id,
-                avoid_duplicate_runs=False,
-                seed=seed,
+                model=model_prime, task=run.task_id, avoid_duplicate_runs=False, seed=seed,
             )
 
         predictions_prime = run_prime._generate_arff_dict()
 
         self._compare_predictions(predictions, predictions_prime)
 
-    def _perform_run(self, task_id, num_instances, n_missing_vals, clf,
-                     flow_expected_rsv=None, seed=1, check_setup=True,
-                     sentinel=None):
+    def _perform_run(
+        self,
+        task_id,
+        num_instances,
+        n_missing_vals,
+        clf,
+        flow_expected_rsv=None,
+        seed=1,
+        check_setup=True,
+        sentinel=None,
+    ):
         """
         Runs a classifier on a task, and performs some basic checks.
         Also uploads the run.
@@ -182,15 +187,15 @@ def _perform_run(self, task_id, num_instances, n_missing_vals, clf,
         run: OpenMLRun
             The performed run (with run id)
         """
-        classes_without_random_state = \
-            ['sklearn.model_selection._search.GridSearchCV',
-             'sklearn.pipeline.Pipeline',
-             'sklearn.linear_model.base.LinearRegression',
-             ]
+        classes_without_random_state = [
+            "sklearn.model_selection._search.GridSearchCV",
+            "sklearn.pipeline.Pipeline",
+            "sklearn.linear_model.base.LinearRegression",
+        ]
 
         def _remove_random_state(flow):
-            if 'random_state' in flow.parameters:
-                del flow.parameters['random_state']
+            if "random_state" in flow.parameters:
+                del flow.parameters["random_state"]
             for component in flow.components.values():
                 _remove_random_state(component)
 
@@ -198,7 +203,7 @@ def _remove_random_state(flow):
         flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
         if not openml.flows.flow_exists(flow.name, flow.external_version):
             flow.publish()
-            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
+            TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
             TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))
 
         task = openml.tasks.get_task(task_id)
@@ -212,7 +217,7 @@ def _remove_random_state(flow):
             avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
         )
         run_ = run.publish()
-        TestBase._mark_entity_for_removal('run', run.run_id)
+        TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
         self.assertEqual(run_, run)
         self.assertIsInstance(run.dataset_id, int)
@@ -232,37 +237,32 @@ def _remove_random_state(flow):
             # test the initialize setup function
             run_id = run_.run_id
             run_server = openml.runs.get_run(run_id)
-            clf_server = openml.setups.initialize_model(
-                setup_id=run_server.setup_id,
-            )
+            clf_server = openml.setups.initialize_model(setup_id=run_server.setup_id,)
             flow_local = self.extension.model_to_flow(clf)
             flow_server = self.extension.model_to_flow(clf_server)
 
             if flow.class_name not in classes_without_random_state:
-                error_msg = 'Flow class %s (id=%d) does not have a random ' \
-                            'state parameter' % (flow.class_name, flow.flow_id)
-                self.assertIn('random_state', flow.parameters, error_msg)
+                error_msg = "Flow class %s (id=%d) does not have a random " "state parameter" % (
+                    flow.class_name,
+                    flow.flow_id,
+                )
+                self.assertIn("random_state", flow.parameters, error_msg)
                 # If the flow is initialized from a model without a random
                 # state, the flow is on the server without any random state
-                self.assertEqual(flow.parameters['random_state'], 'null')
+                self.assertEqual(flow.parameters["random_state"], "null")
                 # As soon as a flow is run, a random state is set in the model.
                 # If a flow is re-instantiated
-                self.assertEqual(flow_local.parameters['random_state'],
-                                 flow_expected_rsv)
-                self.assertEqual(flow_server.parameters['random_state'],
-                                 flow_expected_rsv)
+                self.assertEqual(flow_local.parameters["random_state"], flow_expected_rsv)
+                self.assertEqual(flow_server.parameters["random_state"], flow_expected_rsv)
             _remove_random_state(flow_local)
             _remove_random_state(flow_server)
             openml.flows.assert_flows_equal(flow_local, flow_server)
 
             # and test the initialize setup from run function
-            clf_server2 = openml.runs.initialize_model_from_run(
-                run_id=run_server.run_id,
-            )
+            clf_server2 = openml.runs.initialize_model_from_run(run_id=run_server.run_id,)
             flow_server2 = self.extension.model_to_flow(clf_server2)
             if flow.class_name not in classes_without_random_state:
-                self.assertEqual(flow_server2.parameters['random_state'],
-                                 flow_expected_rsv)
+                self.assertEqual(flow_server2.parameters["random_state"], flow_expected_rsv)
 
             _remove_random_state(flow_server2)
             openml.flows.assert_flows_equal(flow_local, flow_server2)
@@ -271,7 +271,7 @@ def _remove_random_state(flow):
             # self.assertEqual(clf, clf_prime)
 
         downloaded = openml.runs.get_run(run_.run_id)
-        assert ('openml-python' in downloaded.tags)
+        assert "openml-python" in downloaded.tags
 
         # TODO make sure that these attributes are instantiated when
         # downloading a run? Or make sure that the trace object is created when
@@ -281,9 +281,9 @@ def _remove_random_state(flow):
         # self.assertEqual(run_trace, downloaded_run_trace)
         return run
 
-    def _check_sample_evaluations(self, sample_evaluations, num_repeats,
-                                  num_folds, num_samples,
-                                  max_time_allowed=60000):
+    def _check_sample_evaluations(
+        self, sample_evaluations, num_repeats, num_folds, num_samples, max_time_allowed=60000
+    ):
         """
         Checks whether the right timing measures are attached to the run
         (before upload). Test is only performed for versions >= Python3.3
@@ -298,21 +298,20 @@ def _check_sample_evaluations(self, sample_evaluations, num_repeats,
         # maximum allowed value
         check_measures = {
             # should take at least one millisecond (?)
-            'usercpu_time_millis_testing': (0, max_time_allowed),
-            'usercpu_time_millis_training': (0, max_time_allowed),
-            'usercpu_time_millis': (0, max_time_allowed),
-            'wall_clock_time_millis_training': (0, max_time_allowed),
-            'wall_clock_time_millis_testing': (0, max_time_allowed),
-            'wall_clock_time_millis': (0, max_time_allowed),
-            'predictive_accuracy': (0, 1)
+            "usercpu_time_millis_testing": (0, max_time_allowed),
+            "usercpu_time_millis_training": (0, max_time_allowed),
+            "usercpu_time_millis": (0, max_time_allowed),
+            "wall_clock_time_millis_training": (0, max_time_allowed),
+            "wall_clock_time_millis_testing": (0, max_time_allowed),
+            "wall_clock_time_millis": (0, max_time_allowed),
+            "predictive_accuracy": (0, 1),
         }
 
         self.assertIsInstance(sample_evaluations, dict)
         if sys.version_info[:2] >= (3, 3):
             # this only holds if we are allowed to record time (otherwise some
             # are missing)
-            self.assertEqual(set(sample_evaluations.keys()),
-                             set(check_measures.keys()))
+            self.assertEqual(set(sample_evaluations.keys()), set(check_measures.keys()))
 
         for measure in check_measures.keys():
             if measure in sample_evaluations:
@@ -322,14 +321,12 @@ def _check_sample_evaluations(self, sample_evaluations, num_repeats,
                     num_fold_entrees = len(sample_evaluations[measure][rep])
                     self.assertEqual(num_fold_entrees, num_folds)
                     for fold in range(num_fold_entrees):
-                        num_sample_entrees = len(
-                            sample_evaluations[measure][rep][fold])
+                        num_sample_entrees = len(sample_evaluations[measure][rep][fold])
                         self.assertEqual(num_sample_entrees, num_samples)
                         for sample in range(num_sample_entrees):
-                            evaluation = sample_evaluations[measure][rep][
-                                fold][sample]
+                            evaluation = sample_evaluations[measure][rep][fold][sample]
                             self.assertIsInstance(evaluation, float)
-                            if not os.environ.get('CI_WINDOWS'):
+                            if not os.environ.get("CI_WINDOWS"):
                                 # Either Appveyor is much faster than Travis
                                 # and/or measurements are not as accurate.
                                 # Either way, windows seems to get an eval-time
@@ -344,9 +341,7 @@ def test_run_regression_on_classif_task(self):
         task = openml.tasks.get_task(task_id)
         with self.assertRaises(AttributeError):
             openml.runs.run_model_on_task(
-                model=clf,
-                task=task,
-                avoid_duplicate_runs=False,
+                model=clf, task=task, avoid_duplicate_runs=False,
             )
 
     def test_check_erronous_sklearn_flow_fails(self):
@@ -354,14 +349,13 @@ def test_check_erronous_sklearn_flow_fails(self):
         task = openml.tasks.get_task(task_id)
 
         # Invalid parameter values
-        clf = LogisticRegression(C='abc', solver='lbfgs')
+        clf = LogisticRegression(C="abc", solver="lbfgs")
         with self.assertRaisesRegex(
             ValueError,
             r"Penalty term must be positive; got \(C=u?'abc'\)",  # u? for 2.7/3.4-6 compability
         ):
             openml.runs.run_model_on_task(
-                task=task,
-                model=clf,
+                task=task, model=clf,
             )
 
     ###########################################################################
@@ -376,12 +370,21 @@ def test_check_erronous_sklearn_flow_fails(self):
     # execution of the unit tests without the need to add an additional module
     # like unittest2
 
-    def _run_and_upload(self, clf, task_id, n_missing_vals, n_test_obs,
-                        flow_expected_rsv, num_folds=1, num_iterations=5,
-                        seed=1, metric=sklearn.metrics.accuracy_score,
-                        metric_name='predictive_accuracy',
-                        task_type=TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-                        sentinel=None):
+    def _run_and_upload(
+        self,
+        clf,
+        task_id,
+        n_missing_vals,
+        n_test_obs,
+        flow_expected_rsv,
+        num_folds=1,
+        num_iterations=5,
+        seed=1,
+        metric=sklearn.metrics.accuracy_score,
+        metric_name="predictive_accuracy",
+        task_type=TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+        sentinel=None,
+    ):
         def determine_grid_size(param_grid):
             if isinstance(param_grid, dict):
                 grid_iterations = 1
@@ -394,12 +397,17 @@ def determine_grid_size(param_grid):
                     grid_iterations += determine_grid_size(sub_grid)
                 return grid_iterations
             else:
-                raise TypeError('Param Grid should be of type list '
-                                '(GridSearch only) or dict')
+                raise TypeError("Param Grid should be of type list " "(GridSearch only) or dict")
 
-        run = self._perform_run(task_id, n_test_obs, n_missing_vals, clf,
-                                flow_expected_rsv=flow_expected_rsv, seed=seed,
-                                sentinel=sentinel)
+        run = self._perform_run(
+            task_id,
+            n_test_obs,
+            n_missing_vals,
+            clf,
+            flow_expected_rsv=flow_expected_rsv,
+            seed=seed,
+            sentinel=sentinel,
+        )
 
         # obtain scores using get_metric_score:
         scores = run.get_metric_fn(metric)
@@ -407,19 +415,16 @@ def determine_grid_size(param_grid):
         scores_provided = []
         for rep in run.fold_evaluations[metric_name].keys():
             for fold in run.fold_evaluations[metric_name][rep].keys():
-                scores_provided.append(
-                    run.fold_evaluations[metric_name][rep][fold])
+                scores_provided.append(run.fold_evaluations[metric_name][rep][fold])
         self.assertEqual(sum(scores_provided), sum(scores))
 
         if isinstance(clf, BaseSearchCV):
-            trace_content = run.trace.trace_to_arff()['data']
+            trace_content = run.trace.trace_to_arff()["data"]
             if isinstance(clf, GridSearchCV):
                 grid_iterations = determine_grid_size(clf.param_grid)
-                self.assertEqual(len(trace_content),
-                                 grid_iterations * num_folds)
+                self.assertEqual(len(trace_content), grid_iterations * num_folds)
             else:
-                self.assertEqual(len(trace_content),
-                                 num_iterations * num_folds)
+                self.assertEqual(len(trace_content), num_iterations * num_folds)
 
             # downloads the best model based on the optimization trace
             # suboptimal (slow), and not guaranteed to work if evaluation
@@ -428,39 +433,40 @@ def determine_grid_size(param_grid):
             self._wait_for_processed_run(run.run_id, 400)
             try:
                 model_prime = openml.runs.initialize_model_from_trace(
-                    run_id=run.run_id,
-                    repeat=0,
-                    fold=0,
+                    run_id=run.run_id, repeat=0, fold=0,
                 )
             except openml.exceptions.OpenMLServerException as e:
                 e.message = "%s; run_id %d" % (e.message, run.run_id)
                 raise e
 
-            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
-                                                      seed, create_task_obj=True)
-            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
-                                                      seed, create_task_obj=False)
+            self._rerun_model_and_compare_predictions(
+                run.run_id, model_prime, seed, create_task_obj=True
+            )
+            self._rerun_model_and_compare_predictions(
+                run.run_id, model_prime, seed, create_task_obj=False
+            )
         else:
             run_downloaded = openml.runs.get_run(run.run_id)
             sid = run_downloaded.setup_id
             model_prime = openml.setups.initialize_model(sid)
-            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
-                                                      seed, create_task_obj=True)
-            self._rerun_model_and_compare_predictions(run.run_id, model_prime,
-                                                      seed, create_task_obj=False)
+            self._rerun_model_and_compare_predictions(
+                run.run_id, model_prime, seed, create_task_obj=True
+            )
+            self._rerun_model_and_compare_predictions(
+                run.run_id, model_prime, seed, create_task_obj=False
+            )
 
         # todo: check if runtime is present
-        self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds,
-                                            task_type=task_type)
+        self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds, task_type=task_type)
         return run
 
-    def _run_and_upload_classification(self, clf, task_id, n_missing_vals,
-                                       n_test_obs, flow_expected_rsv,
-                                       sentinel=None):
+    def _run_and_upload_classification(
+        self, clf, task_id, n_missing_vals, n_test_obs, flow_expected_rsv, sentinel=None
+    ):
         num_folds = 1  # because of holdout
         num_iterations = 5  # for base search algorithms
         metric = sklearn.metrics.accuracy_score  # metric class
-        metric_name = 'predictive_accuracy'  # openml metric name
+        metric_name = "predictive_accuracy"  # openml metric name
         task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION  # task type
 
         return self._run_and_upload(
@@ -477,13 +483,13 @@ def _run_and_upload_classification(self, clf, task_id, n_missing_vals,
             sentinel=sentinel,
         )
 
-    def _run_and_upload_regression(self, clf, task_id, n_missing_vals,
-                                   n_test_obs, flow_expected_rsv,
-                                   sentinel=None):
+    def _run_and_upload_regression(
+        self, clf, task_id, n_missing_vals, n_test_obs, flow_expected_rsv, sentinel=None
+    ):
         num_folds = 1  # because of holdout
         num_iterations = 5  # for base search algorithms
         metric = sklearn.metrics.mean_absolute_error  # metric class
-        metric_name = 'mean_absolute_error'  # openml metric name
+        metric_name = "mean_absolute_error"  # openml metric name
         task_type = TaskTypeEnum.SUPERVISED_REGRESSION  # task type
 
         return self._run_and_upload(
@@ -501,35 +507,36 @@ def _run_and_upload_regression(self, clf, task_id, n_missing_vals,
         )
 
     def test_run_and_upload_logistic_regression(self):
-        lr = LogisticRegression(solver='lbfgs')
+        lr = LogisticRegression(solver="lbfgs")
         task_id = self.TEST_SERVER_TASK_SIMPLE[0]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
-        self._run_and_upload_classification(lr, task_id, n_missing_vals,
-                                            n_test_obs, '62501')
+        self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
         task_id = self.TEST_SERVER_TASK_REGRESSION[0]
         n_missing_vals = self.TEST_SERVER_TASK_REGRESSION[1]
         n_test_obs = self.TEST_SERVER_TASK_REGRESSION[2]
-        self._run_and_upload_regression(lr, task_id, n_missing_vals,
-                                        n_test_obs, '62501')
+        self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     def test_run_and_upload_pipeline_dummy_pipeline(self):
 
-        pipeline1 = Pipeline(steps=[('scaler',
-                                     StandardScaler(with_mean=False)),
-                                    ('dummy',
-                                     DummyClassifier(strategy='prior'))])
+        pipeline1 = Pipeline(
+            steps=[
+                ("scaler", StandardScaler(with_mean=False)),
+                ("dummy", DummyClassifier(strategy="prior")),
+            ]
+        )
         task_id = self.TEST_SERVER_TASK_SIMPLE[0]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
-        self._run_and_upload_classification(pipeline1, task_id, n_missing_vals,
-                                            n_test_obs, '62501')
+        self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
 
-    @unittest.skipIf(LooseVersion(sklearn.__version__) < "0.20",
-                     reason="columntransformer introduction in 0.20.0")
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
     def test_run_and_upload_column_transformer_pipeline(self):
         import sklearn.compose
         import sklearn.impute
@@ -537,56 +544,72 @@ def test_run_and_upload_column_transformer_pipeline(self):
         def get_ct_cf(nominal_indices, numeric_indices):
             inner = sklearn.compose.ColumnTransformer(
                 transformers=[
-                    ('numeric', sklearn.preprocessing.StandardScaler(),
-                     nominal_indices),
-                    ('nominal', sklearn.preprocessing.OneHotEncoder(
-                        handle_unknown='ignore'), numeric_indices)],
-                remainder='passthrough')
+                    ("numeric", sklearn.preprocessing.StandardScaler(), nominal_indices),
+                    (
+                        "nominal",
+                        sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                        numeric_indices,
+                    ),
+                ],
+                remainder="passthrough",
+            )
             return sklearn.pipeline.Pipeline(
                 steps=[
-                    ('imputer', sklearn.impute.SimpleImputer(
-                        strategy='constant', fill_value=-1)),
-                    ('transformer', inner),
-                    ('classifier', sklearn.tree.DecisionTreeClassifier())
+                    ("imputer", sklearn.impute.SimpleImputer(strategy="constant", fill_value=-1)),
+                    ("transformer", inner),
+                    ("classifier", sklearn.tree.DecisionTreeClassifier()),
                 ]
             )
 
         sentinel = self._get_sentinel()
         self._run_and_upload_classification(
-            get_ct_cf(self.TEST_SERVER_TASK_SIMPLE[3],
-                      self.TEST_SERVER_TASK_SIMPLE[4]),
-            self.TEST_SERVER_TASK_SIMPLE[0], self.TEST_SERVER_TASK_SIMPLE[1],
-            self.TEST_SERVER_TASK_SIMPLE[2], '62501', sentinel=sentinel)
+            get_ct_cf(self.TEST_SERVER_TASK_SIMPLE[3], self.TEST_SERVER_TASK_SIMPLE[4]),
+            self.TEST_SERVER_TASK_SIMPLE[0],
+            self.TEST_SERVER_TASK_SIMPLE[1],
+            self.TEST_SERVER_TASK_SIMPLE[2],
+            "62501",
+            sentinel=sentinel,
+        )
         # Due to #602, it is important to test this model on two tasks
         # with different column specifications
         self._run_and_upload_classification(
-            get_ct_cf(self.TEST_SERVER_TASK_MISSING_VALS[3],
-                      self.TEST_SERVER_TASK_MISSING_VALS[4]),
+            get_ct_cf(self.TEST_SERVER_TASK_MISSING_VALS[3], self.TEST_SERVER_TASK_MISSING_VALS[4]),
             self.TEST_SERVER_TASK_MISSING_VALS[0],
             self.TEST_SERVER_TASK_MISSING_VALS[1],
             self.TEST_SERVER_TASK_MISSING_VALS[2],
-            '62501', sentinel=sentinel)
+            "62501",
+            sentinel=sentinel,
+        )
 
     def test_run_and_upload_decision_tree_pipeline(self):
-        pipeline2 = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
-                                    ('VarianceThreshold', VarianceThreshold()),
-                                    ('Estimator', RandomizedSearchCV(
-                                        DecisionTreeClassifier(),
-                                        {'min_samples_split':
-                                         [2 ** x for x in range(1, 8)],
-                                         'min_samples_leaf':
-                                         [2 ** x for x in range(0, 7)]},
-                                        cv=3, n_iter=10))])
+        pipeline2 = Pipeline(
+            steps=[
+                ("Imputer", SimpleImputer(strategy="median")),
+                ("VarianceThreshold", VarianceThreshold()),
+                (
+                    "Estimator",
+                    RandomizedSearchCV(
+                        DecisionTreeClassifier(),
+                        {
+                            "min_samples_split": [2 ** x for x in range(1, 8)],
+                            "min_samples_leaf": [2 ** x for x in range(0, 7)],
+                        },
+                        cv=3,
+                        n_iter=10,
+                    ),
+                ),
+            ]
+        )
         task_id = self.TEST_SERVER_TASK_MISSING_VALS[0]
         n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS[1]
         n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS[2]
-        self._run_and_upload_classification(pipeline2, task_id, n_missing_vals,
-                                            n_test_obs, '62501')
+        self._run_and_upload_classification(pipeline2, task_id, n_missing_vals, n_test_obs, "62501")
 
     def test_run_and_upload_gridsearch(self):
-        gridsearch = GridSearchCV(BaggingClassifier(base_estimator=SVC()),
-                                  {"base_estimator__C": [0.01, 0.1, 10],
-                                   "base_estimator__gamma": [0.01, 0.1, 10]})
+        gridsearch = GridSearchCV(
+            BaggingClassifier(base_estimator=SVC()),
+            {"base_estimator__C": [0.01, 0.1, 10], "base_estimator__gamma": [0.01, 0.1, 10]},
+        )
         task_id = self.TEST_SERVER_TASK_SIMPLE[0]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
@@ -595,21 +618,24 @@ def test_run_and_upload_gridsearch(self):
             task_id=task_id,
             n_missing_vals=n_missing_vals,
             n_test_obs=n_test_obs,
-            flow_expected_rsv='62501',
+            flow_expected_rsv="62501",
         )
         self.assertEqual(len(run.trace.trace_iterations), 9)
 
     def test_run_and_upload_randomsearch(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
-            {"max_depth": [3, None],
-             "max_features": [1, 2, 3, 4],
-             "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-             "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-             "bootstrap": [True, False],
-             "criterion": ["gini", "entropy"]},
+            {
+                "max_depth": [3, None],
+                "max_features": [1, 2, 3, 4],
+                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "bootstrap": [True, False],
+                "criterion": ["gini", "entropy"],
+            },
             cv=StratifiedKFold(n_splits=2, shuffle=True),
-            n_iter=5)
+            n_iter=5,
+        )
         # The random states for the RandomizedSearchCV is set after the
         # random state of the RandomForestClassifier is set, therefore,
         # it has a different value than the other examples before
@@ -621,7 +647,7 @@ def test_run_and_upload_randomsearch(self):
             task_id=task_id,
             n_missing_vals=n_missing_vals,
             n_test_obs=n_test_obs,
-            flow_expected_rsv='12172',
+            flow_expected_rsv="12172",
         )
         self.assertEqual(len(run.trace.trace_iterations), 5)
 
@@ -632,11 +658,8 @@ def test_run_and_upload_maskedarrays(self):
         # 2) it verifies the correct handling of a 2-layered grid search
         gridsearch = GridSearchCV(
             RandomForestClassifier(n_estimators=5),
-            [
-                {'max_features': [2, 4]},
-                {'min_samples_leaf': [1, 10]}
-            ],
-            cv=StratifiedKFold(n_splits=2, shuffle=True)
+            [{"max_features": [2, 4]}, {"min_samples_leaf": [1, 10]}],
+            cv=StratifiedKFold(n_splits=2, shuffle=True),
         )
         # The random states for the GridSearchCV is set after the
         # random state of the RandomForestClassifier is set, therefore,
@@ -644,9 +667,9 @@ def test_run_and_upload_maskedarrays(self):
         task_id = self.TEST_SERVER_TASK_SIMPLE[0]
         n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
-        self._run_and_upload_classification(gridsearch, task_id,
-                                            n_missing_vals, n_test_obs,
-                                            '12172')
+        self._run_and_upload_classification(
+            gridsearch, task_id, n_missing_vals, n_test_obs, "12172"
+        )
 
     ##########################################################################
 
@@ -658,14 +681,16 @@ def test_learning_curve_task_1(self):
         num_folds = 10
         num_samples = 8
 
-        pipeline1 = Pipeline(steps=[('scaler',
-                                     StandardScaler(with_mean=False)),
-                                    ('dummy',
-                                     DummyClassifier(strategy='prior'))])
-        run = self._perform_run(task_id, num_test_instances, num_missing_vals,
-                                pipeline1, flow_expected_rsv='62501')
-        self._check_sample_evaluations(run.sample_evaluations, num_repeats,
-                                       num_folds, num_samples)
+        pipeline1 = Pipeline(
+            steps=[
+                ("scaler", StandardScaler(with_mean=False)),
+                ("dummy", DummyClassifier(strategy="prior")),
+            ]
+        )
+        run = self._perform_run(
+            task_id, num_test_instances, num_missing_vals, pipeline1, flow_expected_rsv="62501"
+        )
+        self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
     def test_learning_curve_task_2(self):
         task_id = 801  # diabates dataset
@@ -675,41 +700,50 @@ def test_learning_curve_task_2(self):
         num_folds = 10
         num_samples = 8
 
-        pipeline2 = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
-                                    ('VarianceThreshold', VarianceThreshold()),
-                                    ('Estimator', RandomizedSearchCV(
-                                        DecisionTreeClassifier(),
-                                        {'min_samples_split':
-                                         [2 ** x for x in range(1, 8)],
-                                         'min_samples_leaf':
-                                         [2 ** x for x in range(0, 7)]},
-                                        cv=3, n_iter=10))])
-        run = self._perform_run(task_id, num_test_instances, num_missing_vals,
-                                pipeline2, flow_expected_rsv='62501')
-        self._check_sample_evaluations(run.sample_evaluations, num_repeats,
-                                       num_folds, num_samples)
+        pipeline2 = Pipeline(
+            steps=[
+                ("Imputer", SimpleImputer(strategy="median")),
+                ("VarianceThreshold", VarianceThreshold()),
+                (
+                    "Estimator",
+                    RandomizedSearchCV(
+                        DecisionTreeClassifier(),
+                        {
+                            "min_samples_split": [2 ** x for x in range(1, 8)],
+                            "min_samples_leaf": [2 ** x for x in range(0, 7)],
+                        },
+                        cv=3,
+                        n_iter=10,
+                    ),
+                ),
+            ]
+        )
+        run = self._perform_run(
+            task_id, num_test_instances, num_missing_vals, pipeline2, flow_expected_rsv="62501"
+        )
+        self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
     def test_initialize_cv_from_run(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
-            {"max_depth": [3, None],
-             "max_features": [1, 2, 3, 4],
-             "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-             "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-             "bootstrap": [True, False],
-             "criterion": ["gini", "entropy"]},
+            {
+                "max_depth": [3, None],
+                "max_features": [1, 2, 3, 4],
+                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "bootstrap": [True, False],
+                "criterion": ["gini", "entropy"],
+            },
             cv=StratifiedKFold(n_splits=2, shuffle=True),
-            n_iter=2)
+            n_iter=2,
+        )
 
         task = openml.tasks.get_task(11)
         run = openml.runs.run_model_on_task(
-            model=randomsearch,
-            task=task,
-            avoid_duplicate_runs=False,
-            seed=1,
+            model=randomsearch, task=task, avoid_duplicate_runs=False, seed=1,
         )
         run_ = run.publish()
-        TestBase._mark_entity_for_removal('run', run.run_id)
+        TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
         run = openml.runs.get_run(run_.run_id)
 
@@ -723,27 +757,25 @@ def _test_local_evaluations(self, run):
 
         # compare with the scores in user defined measures
         accuracy_scores_provided = []
-        for rep in run.fold_evaluations['predictive_accuracy'].keys():
-            for fold in run.fold_evaluations['predictive_accuracy'][rep].\
-                    keys():
+        for rep in run.fold_evaluations["predictive_accuracy"].keys():
+            for fold in run.fold_evaluations["predictive_accuracy"][rep].keys():
                 accuracy_scores_provided.append(
-                    run.fold_evaluations['predictive_accuracy'][rep][fold])
+                    run.fold_evaluations["predictive_accuracy"][rep][fold]
+                )
         accuracy_scores = run.get_metric_fn(sklearn.metrics.accuracy_score)
-        np.testing.assert_array_almost_equal(accuracy_scores_provided,
-                                             accuracy_scores)
+        np.testing.assert_array_almost_equal(accuracy_scores_provided, accuracy_scores)
 
         # also check if we can obtain some other scores:
-        tests = [(sklearn.metrics.cohen_kappa_score, {'weights': None}),
-                 (sklearn.metrics.roc_auc_score, {}),
-                 (sklearn.metrics.average_precision_score, {}),
-                 (sklearn.metrics.jaccard_similarity_score, {}),
-                 (sklearn.metrics.precision_score, {'average': 'macro'}),
-                 (sklearn.metrics.brier_score_loss, {})]
+        tests = [
+            (sklearn.metrics.cohen_kappa_score, {"weights": None}),
+            (sklearn.metrics.roc_auc_score, {}),
+            (sklearn.metrics.average_precision_score, {}),
+            (sklearn.metrics.jaccard_similarity_score, {}),
+            (sklearn.metrics.precision_score, {"average": "macro"}),
+            (sklearn.metrics.brier_score_loss, {}),
+        ]
         for test_idx, test in enumerate(tests):
-            alt_scores = run.get_metric_fn(
-                sklearn_fn=test[0],
-                kwargs=test[1],
-            )
+            alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],)
             self.assertEqual(len(alt_scores), 10)
             for idx in range(len(alt_scores)):
                 self.assertGreaterEqual(alt_scores[idx], 0)
@@ -752,17 +784,19 @@ def _test_local_evaluations(self, run):
     def test_local_run_swapped_parameter_order_model(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
-                              ('estimator', RandomForestClassifier())])
+        clf = Pipeline(
+            steps=[
+                ("imputer", SimpleImputer(strategy="median")),
+                ("estimator", RandomForestClassifier()),
+            ]
+        )
 
         # download task
         task = openml.tasks.get_task(7)
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
-            task, clf,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
+            task, clf, avoid_duplicate_runs=False, upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -770,8 +804,12 @@ def test_local_run_swapped_parameter_order_model(self):
     def test_local_run_swapped_parameter_order_flow(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
-                              ('estimator', RandomForestClassifier())])
+        clf = Pipeline(
+            steps=[
+                ("imputer", SimpleImputer(strategy="median")),
+                ("estimator", RandomForestClassifier()),
+            ]
+        )
 
         flow = self.extension.model_to_flow(clf)
         # download task
@@ -779,9 +817,7 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         # invoke OpenML run
         run = openml.runs.run_flow_on_task(
-            task, flow,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
+            task, flow, avoid_duplicate_runs=False, upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -789,18 +825,19 @@ def test_local_run_swapped_parameter_order_flow(self):
     def test_local_run_metric_score(self):
 
         # construct sci-kit learn classifier
-        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
-                              ('estimator', RandomForestClassifier())])
+        clf = Pipeline(
+            steps=[
+                ("imputer", SimpleImputer(strategy="median")),
+                ("estimator", RandomForestClassifier()),
+            ]
+        )
 
         # download task
         task = openml.tasks.get_task(7)
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
-            model=clf,
-            task=task,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
+            model=clf, task=task, avoid_duplicate_runs=False, upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -815,18 +852,17 @@ def test_online_run_metric_score(self):
         self._test_local_evaluations(run)
 
     def test_initialize_model_from_run(self):
-        clf = sklearn.pipeline.Pipeline(steps=[
-            ('Imputer', SimpleImputer(strategy='median')),
-            ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
-            ('Estimator', GaussianNB())])
-        task = openml.tasks.get_task(11)
-        run = openml.runs.run_model_on_task(
-            model=clf,
-            task=task,
-            avoid_duplicate_runs=False,
+        clf = sklearn.pipeline.Pipeline(
+            steps=[
+                ("Imputer", SimpleImputer(strategy="median")),
+                ("VarianceThreshold", VarianceThreshold(threshold=0.05)),
+                ("Estimator", GaussianNB()),
+            ]
         )
+        task = openml.tasks.get_task(11)
+        run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=False,)
         run_ = run.publish()
-        TestBase._mark_entity_for_removal('run', run_.run_id)
+        TestBase._mark_entity_for_removal("run", run_.run_id)
         TestBase.logger.info("collected from test_run_functions: {}".format(run_.run_id))
         run = openml.runs.get_run(run_.run_id)
 
@@ -839,10 +875,8 @@ def test_initialize_model_from_run(self):
         openml.flows.assert_flows_equal(flowR, flowL)
         openml.flows.assert_flows_equal(flowS, flowL)
 
-        self.assertEqual(flowS.components['Imputer'].
-                         parameters['strategy'], '"median"')
-        self.assertEqual(flowS.components['VarianceThreshold'].
-                         parameters['threshold'], '0.05')
+        self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"median"')
+        self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
     @pytest.mark.flaky()
     def test_get_run_trace(self):
@@ -856,31 +890,30 @@ def test_get_run_trace(self):
 
         # IMPORTANT! Do not sentinel this flow. is faster if we don't wait
         # on openml server
-        clf = RandomizedSearchCV(RandomForestClassifier(random_state=42,
-                                                        n_estimators=5),
-
-                                 {"max_depth": [3, None],
-                                  "max_features": [1, 2, 3, 4],
-                                  "bootstrap": [True, False],
-                                  "criterion": ["gini", "entropy"]},
-                                 num_iterations, random_state=42, cv=3)
+        clf = RandomizedSearchCV(
+            RandomForestClassifier(random_state=42, n_estimators=5),
+            {
+                "max_depth": [3, None],
+                "max_features": [1, 2, 3, 4],
+                "bootstrap": [True, False],
+                "criterion": ["gini", "entropy"],
+            },
+            num_iterations,
+            random_state=42,
+            cv=3,
+        )
 
         # [SPEED] make unit test faster by exploiting run information
         # from the past
         try:
             # in case the run did not exists yet
-            run = openml.runs.run_model_on_task(
-                model=clf,
-                task=task,
-                avoid_duplicate_runs=True,
-            )
+            run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=True,)
 
             self.assertEqual(
-                len(run.trace.trace_iterations),
-                num_iterations * num_folds,
+                len(run.trace.trace_iterations), num_iterations * num_folds,
             )
             run = run.publish()
-            TestBase._mark_entity_for_removal('run', run.run_id)
+            TestBase._mark_entity_for_removal("run", run.run_id)
             TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
             self._wait_for_processed_run(run.run_id, 200)
             run_id = run.run_id
@@ -900,16 +933,20 @@ def test__run_exists(self):
         # and can just check their status on line
         rs = 1
         clfs = [
-            sklearn.pipeline.Pipeline(steps=[
-                ('Imputer', SimpleImputer(strategy='mean')),
-                ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
-                ('Estimator', DecisionTreeClassifier(max_depth=4))
-            ]),
-            sklearn.pipeline.Pipeline(steps=[
-                ('Imputer', SimpleImputer(strategy='most_frequent')),
-                ('VarianceThreshold', VarianceThreshold(threshold=0.1)),
-                ('Estimator', DecisionTreeClassifier(max_depth=4))]
-            )
+            sklearn.pipeline.Pipeline(
+                steps=[
+                    ("Imputer", SimpleImputer(strategy="mean")),
+                    ("VarianceThreshold", VarianceThreshold(threshold=0.05)),
+                    ("Estimator", DecisionTreeClassifier(max_depth=4)),
+                ]
+            ),
+            sklearn.pipeline.Pipeline(
+                steps=[
+                    ("Imputer", SimpleImputer(strategy="most_frequent")),
+                    ("VarianceThreshold", VarianceThreshold(threshold=0.1)),
+                    ("Estimator", DecisionTreeClassifier(max_depth=4)),
+                ]
+            ),
         ]
 
         task = openml.tasks.get_task(115)
@@ -919,14 +956,10 @@ def test__run_exists(self):
                 # first populate the server with this run.
                 # skip run if it was already performed.
                 run = openml.runs.run_model_on_task(
-                    model=clf,
-                    task=task,
-                    seed=rs,
-                    avoid_duplicate_runs=True,
-                    upload_flow=True
+                    model=clf, task=task, seed=rs, avoid_duplicate_runs=True, upload_flow=True
                 )
                 run.publish()
-                TestBase._mark_entity_for_removal('run', run.run_id)
+                TestBase._mark_entity_for_removal("run", run.run_id)
                 TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
             except openml.exceptions.PyOpenMLError:
                 # run already existed. Great.
@@ -952,13 +985,12 @@ def test_run_with_illegal_flow_id(self):
         flow = self.extension.model_to_flow(clf)
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
         flow.flow_id = -1
-        expected_message_regex = ("Flow does not exist on the server, "
-                                  "but 'flow.flow_id' is not None.")
+        expected_message_regex = (
+            "Flow does not exist on the server, " "but 'flow.flow_id' is not None."
+        )
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task,
-                flow=flow,
-                avoid_duplicate_runs=True,
+                task=task, flow=flow, avoid_duplicate_runs=True,
             )
 
     def test_run_with_illegal_flow_id_after_load(self):
@@ -970,25 +1002,19 @@ def test_run_with_illegal_flow_id_after_load(self):
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
         flow.flow_id = -1
         run = openml.runs.run_flow_on_task(
-            task=task,
-            flow=flow,
-            avoid_duplicate_runs=False,
-            upload_flow=False
+            task=task, flow=flow, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
-        expected_message_regex = ("Flow does not exist on the server, "
-                                  "but 'flow.flow_id' is not None.")
+        expected_message_regex = (
+            "Flow does not exist on the server, " "but 'flow.flow_id' is not None."
+        )
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             loaded_run.publish()
-            TestBase._mark_entity_for_removal('run', loaded_run.run_id)
+            TestBase._mark_entity_for_removal("run", loaded_run.run_id)
             TestBase.logger.info("collected from test_run_functions: {}".format(loaded_run.run_id))
 
     def test_run_with_illegal_flow_id_1(self):
@@ -999,7 +1025,7 @@ def test_run_with_illegal_flow_id_1(self):
         flow_orig = self.extension.model_to_flow(clf)
         try:
             flow_orig.publish()  # ensures flow exist on server
-            TestBase._mark_entity_for_removal('flow', (flow_orig.flow_id, flow_orig.name))
+            TestBase._mark_entity_for_removal("flow", (flow_orig.flow_id, flow_orig.name))
             TestBase.logger.info("collected from test_run_functions: {}".format(flow_orig.flow_id))
         except openml.exceptions.OpenMLServerException:
             # flow already exists
@@ -1007,15 +1033,10 @@ def test_run_with_illegal_flow_id_1(self):
         flow_new = self.extension.model_to_flow(clf)
 
         flow_new.flow_id = -1
-        expected_message_regex = (
-            "Local flow_id does not match server flow_id: "
-            "'-1' vs '[0-9]+'"
-        )
+        expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task,
-                flow=flow_new,
-                avoid_duplicate_runs=True,
+                task=task, flow=flow_new, avoid_duplicate_runs=True,
             )
 
     def test_run_with_illegal_flow_id_1_after_load(self):
@@ -1026,7 +1047,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         flow_orig = self.extension.model_to_flow(clf)
         try:
             flow_orig.publish()  # ensures flow exist on server
-            TestBase._mark_entity_for_removal('flow', (flow_orig.flow_id, flow_orig.name))
+            TestBase._mark_entity_for_removal("flow", (flow_orig.flow_id, flow_orig.name))
             TestBase.logger.info("collected from test_run_functions: {}".format(flow_orig.flow_id))
         except openml.exceptions.OpenMLServerException:
             # flow already exists
@@ -1035,28 +1056,16 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         flow_new.flow_id = -1
 
         run = openml.runs.run_flow_on_task(
-            task=task,
-            flow=flow_new,
-            avoid_duplicate_runs=False,
-            upload_flow=False
+            task=task, flow=flow_new, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(
-            self.workdir,
-            'runs',
-            str(random.getrandbits(128)),
-        )
+        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
-        expected_message_regex = (
-            "Local flow_id does not match server flow_id: "
-            "'-1' vs '[0-9]+'"
-        )
+        expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
         self.assertRaisesRegex(
-            openml.exceptions.PyOpenMLError,
-            expected_message_regex,
-            loaded_run.publish
+            openml.exceptions.PyOpenMLError, expected_message_regex, loaded_run.publish
         )
 
     def test__run_task_get_arffcontent(self):
@@ -1066,14 +1075,10 @@ def test__run_task_get_arffcontent(self):
         num_repeats = 1
 
         flow = unittest.mock.Mock()
-        flow.name = 'dummy'
-        clf = SGDClassifier(loss='log', random_state=1)
+        flow.name = "dummy"
+        clf = SGDClassifier(loss="log", random_state=1)
         res = openml.runs.functions._run_task_get_arffcontent(
-            flow=flow,
-            extension=self.extension,
-            model=clf,
-            task=task,
-            add_local_measures=True,
+            flow=flow, extension=self.extension, model=clf, task=task, add_local_measures=True,
         )
         arff_datacontent, trace, fold_evaluations, _ = res
         # predictions
@@ -1082,8 +1087,9 @@ def test__run_task_get_arffcontent(self):
         self.assertIsInstance(trace, type(None))
 
         task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION
-        self._check_fold_timing_evaluations(fold_evaluations, num_repeats, num_folds,
-                                            task_type=task_type)
+        self._check_fold_timing_evaluations(
+            fold_evaluations, num_repeats, num_folds, task_type=task_type
+        )
 
         # 10 times 10 fold CV of 150 samples
         self.assertEqual(len(arff_datacontent), num_instances * num_repeats)
@@ -1101,12 +1107,11 @@ def test__run_task_get_arffcontent(self):
             self.assertLessEqual(arff_line[2], num_instances - 1)
             # check confidences
             self.assertAlmostEqual(sum(arff_line[4:6]), 1.0)
-            self.assertIn(arff_line[6], ['won', 'nowin'])
-            self.assertIn(arff_line[7], ['won', 'nowin'])
+            self.assertIn(arff_line[6], ["won", "nowin"])
+            self.assertIn(arff_line[7], ["won", "nowin"])
 
     def test__create_trace_from_arff(self):
-        with open(self.static_cache_dir + '/misc/trace.arff',
-                  'r') as arff_file:
+        with open(self.static_cache_dir + "/misc/trace.arff", "r") as arff_file:
             trace_arff = arff.load(arff_file)
         OpenMLRunTrace.trace_from_arff(trace_arff)
 
@@ -1115,25 +1120,25 @@ def test_get_run(self):
         openml.config.server = self.production_server
         run = openml.runs.get_run(473351)
         self.assertEqual(run.dataset_id, 357)
-        self.assertEqual(run.evaluations['f_measure'], 0.841225)
-        for i, value in [(0, 0.840918),
-                         (1, 0.839458),
-                         (2, 0.839613),
-                         (3, 0.842571),
-                         (4, 0.839567),
-                         (5, 0.840922),
-                         (6, 0.840985),
-                         (7, 0.847129),
-                         (8, 0.84218),
-                         (9, 0.844014)]:
-            self.assertEqual(run.fold_evaluations['f_measure'][0][i], value)
-        assert ('weka' in run.tags)
-        assert ('weka_3.7.12' in run.tags)
-        assert (
-            run.predictions_url == (
-                "https://www.openml.org/data/download/1667125/"
-                "weka_generated_predictions4575715871712251329.arff"
-            )
+        self.assertEqual(run.evaluations["f_measure"], 0.841225)
+        for i, value in [
+            (0, 0.840918),
+            (1, 0.839458),
+            (2, 0.839613),
+            (3, 0.842571),
+            (4, 0.839567),
+            (5, 0.840922),
+            (6, 0.840985),
+            (7, 0.847129),
+            (8, 0.84218),
+            (9, 0.844014),
+        ]:
+            self.assertEqual(run.fold_evaluations["f_measure"][0][i], value)
+        assert "weka" in run.tags
+        assert "weka_3.7.12" in run.tags
+        assert run.predictions_url == (
+            "https://www.openml.org/data/download/1667125/"
+            "weka_generated_predictions4575715871712251329.arff"
         )
 
     def _check_run(self, run):
@@ -1156,12 +1161,12 @@ def test_get_runs_list(self):
     def test_list_runs_empty(self):
         runs = openml.runs.list_runs(task=[0])
         if len(runs) > 0:
-            raise ValueError('UnitTest Outdated, got somehow results')
+            raise ValueError("UnitTest Outdated, got somehow results")
 
         self.assertIsInstance(runs, dict)
 
     def test_list_runs_output_format(self):
-        runs = openml.runs.list_runs(size=1000, output_format='dataframe')
+        runs = openml.runs.list_runs(size=1000, output_format="dataframe")
         self.assertIsInstance(runs, pd.DataFrame)
 
     def test_get_runs_list_by_task(self):
@@ -1171,7 +1176,7 @@ def test_get_runs_list_by_task(self):
         runs = openml.runs.list_runs(task=task_ids)
         self.assertGreaterEqual(len(runs), 590)
         for rid in runs:
-            self.assertIn(runs[rid]['task_id'], task_ids)
+            self.assertIn(runs[rid]["task_id"], task_ids)
             self._check_run(runs[rid])
         num_runs = len(runs)
 
@@ -1179,7 +1184,7 @@ def test_get_runs_list_by_task(self):
         runs = openml.runs.list_runs(task=task_ids)
         self.assertGreaterEqual(len(runs), num_runs + 1)
         for rid in runs:
-            self.assertIn(runs[rid]['task_id'], task_ids)
+            self.assertIn(runs[rid]["task_id"], task_ids)
             self._check_run(runs[rid])
 
     def test_get_runs_list_by_uploader(self):
@@ -1191,7 +1196,7 @@ def test_get_runs_list_by_uploader(self):
         runs = openml.runs.list_runs(uploader=uploader_ids)
         self.assertGreaterEqual(len(runs), 2)
         for rid in runs:
-            self.assertIn(runs[rid]['uploader'], uploader_ids)
+            self.assertIn(runs[rid]["uploader"], uploader_ids)
             self._check_run(runs[rid])
         num_runs = len(runs)
 
@@ -1200,7 +1205,7 @@ def test_get_runs_list_by_uploader(self):
         runs = openml.runs.list_runs(uploader=uploader_ids)
         self.assertGreaterEqual(len(runs), num_runs + 1)
         for rid in runs:
-            self.assertIn(runs[rid]['uploader'], uploader_ids)
+            self.assertIn(runs[rid]["uploader"], uploader_ids)
             self._check_run(runs[rid])
 
     def test_get_runs_list_by_flow(self):
@@ -1210,7 +1215,7 @@ def test_get_runs_list_by_flow(self):
         runs = openml.runs.list_runs(flow=flow_ids)
         self.assertGreaterEqual(len(runs), 1)
         for rid in runs:
-            self.assertIn(runs[rid]['flow_id'], flow_ids)
+            self.assertIn(runs[rid]["flow_id"], flow_ids)
             self._check_run(runs[rid])
         num_runs = len(runs)
 
@@ -1218,7 +1223,7 @@ def test_get_runs_list_by_flow(self):
         runs = openml.runs.list_runs(flow=flow_ids)
         self.assertGreaterEqual(len(runs), num_runs + 1)
         for rid in runs:
-            self.assertIn(runs[rid]['flow_id'], flow_ids)
+            self.assertIn(runs[rid]["flow_id"], flow_ids)
             self._check_run(runs[rid])
 
     def test_get_runs_pagination(self):
@@ -1228,8 +1233,7 @@ def test_get_runs_pagination(self):
         size = 10
         max = 100
         for i in range(0, max, size):
-            runs = openml.runs.list_runs(offset=i, size=size,
-                                         uploader=uploader_ids)
+            runs = openml.runs.list_runs(offset=i, size=size, uploader=uploader_ids)
             self.assertGreaterEqual(size, len(runs))
             for rid in runs:
                 self.assertIn(runs[rid]["uploader"], uploader_ids)
@@ -1243,11 +1247,11 @@ def test_get_runs_list_by_filters(self):
         uploaders_2 = [29, 274]
         flows = [74, 1718]
 
-        '''
+        """
         Since the results are taken by batch size, the function does not
         throw an OpenMLServerError anymore. Instead it throws a
         TimeOutException. For the moment commented out.
-        '''
+        """
         # self.assertRaises(openml.exceptions.OpenMLServerError,
         # openml.runs.list_runs)
 
@@ -1269,7 +1273,7 @@ def test_get_runs_list_by_tag(self):
         # TODO: comes from live, no such lists on test
         # Unit test works on production server only
         openml.config.server = self.production_server
-        runs = openml.runs.list_runs(tag='curves')
+        runs = openml.runs.list_runs(tag="curves")
         self.assertGreaterEqual(len(runs), 1)
 
     def test_run_on_dataset_with_missing_labels(self):
@@ -1278,18 +1282,18 @@ def test_run_on_dataset_with_missing_labels(self):
         # actual data
 
         flow = unittest.mock.Mock()
-        flow.name = 'dummy'
+        flow.name = "dummy"
         task = openml.tasks.get_task(2)
 
-        model = Pipeline(steps=[('Imputer', SimpleImputer(strategy='median')),
-                                ('Estimator', DecisionTreeClassifier())])
+        model = Pipeline(
+            steps=[
+                ("Imputer", SimpleImputer(strategy="median")),
+                ("Estimator", DecisionTreeClassifier()),
+            ]
+        )
 
         data_content, _, _, _ = _run_task_get_arffcontent(
-            flow=flow,
-            model=model,
-            task=task,
-            extension=self.extension,
-            add_local_measures=True,
+            flow=flow, model=model, task=task, extension=self.extension, add_local_measures=True,
         )
         # 2 folds, 5 repeats; keep in mind that this task comes from the test
         # server, the task on the live server is different
@@ -1311,18 +1315,15 @@ def test_run_flow_on_task_downloaded_flow(self):
         model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
         flow = self.extension.model_to_flow(model)
         flow.publish(raise_error_if_exists=False)
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
         TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))
 
         downloaded_flow = openml.flows.get_flow(flow.flow_id)
         task = openml.tasks.get_task(119)  # diabetes
         run = openml.runs.run_flow_on_task(
-            flow=downloaded_flow,
-            task=task,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
+            flow=downloaded_flow, task=task, avoid_duplicate_runs=False, upload_flow=False,
         )
 
         run.publish()
-        TestBase._mark_entity_for_removal('run', run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
+        TestBase._mark_entity_for_removal("run", run.run_id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
diff --git a/tests/test_runs/test_trace.py b/tests/test_runs/test_trace.py
index be339617d..96724d139 100644
--- a/tests/test_runs/test_trace.py
+++ b/tests/test_runs/test_trace.py
@@ -14,7 +14,7 @@ def test_get_selected_iteration(self):
                         repeat=i,
                         fold=j,
                         iteration=5,
-                        setup_string='parameter_%d%d%d' % (i, j, k),
+                        setup_string="parameter_%d%d%d" % (i, j, k),
                         evaluation=1.0 * i + 0.1 * j + 0.01 * k,
                         selected=(i == j and i == k and i == 2),
                         parameters=None,
@@ -25,8 +25,7 @@ def test_get_selected_iteration(self):
         # This next one should simply not fail
         self.assertEqual(trace.get_selected_iteration(2, 2), 2)
         with self.assertRaisesRegex(
-            ValueError,
-                'Could not find the selected iteration for rep/fold 3/3',
+            ValueError, "Could not find the selected iteration for rep/fold 3/3",
         ):
 
             trace.get_selected_iteration(3, 3)
@@ -34,56 +33,48 @@ def test_get_selected_iteration(self):
     def test_initialization(self):
         """Check all different ways to fail the initialization """
         with self.assertRaisesRegex(
-            ValueError,
-            'Trace content not available.',
-        ):
-            OpenMLRunTrace.generate(attributes='foo', content=None)
-        with self.assertRaisesRegex(
-            ValueError,
-            'Trace attributes not available.',
+            ValueError, "Trace content not available.",
         ):
-            OpenMLRunTrace.generate(attributes=None, content='foo')
+            OpenMLRunTrace.generate(attributes="foo", content=None)
         with self.assertRaisesRegex(
-            ValueError,
-            'Trace content is empty.'
+            ValueError, "Trace attributes not available.",
         ):
-            OpenMLRunTrace.generate(attributes='foo', content=[])
+            OpenMLRunTrace.generate(attributes=None, content="foo")
+        with self.assertRaisesRegex(ValueError, "Trace content is empty."):
+            OpenMLRunTrace.generate(attributes="foo", content=[])
         with self.assertRaisesRegex(
-            ValueError,
-            'Trace_attributes and trace_content not compatible:'
+            ValueError, "Trace_attributes and trace_content not compatible:"
         ):
-            OpenMLRunTrace.generate(attributes=['abc'], content=[[1, 2]])
+            OpenMLRunTrace.generate(attributes=["abc"], content=[[1, 2]])
 
     def test_duplicate_name(self):
         # Test that the user does not pass a parameter which has the same name
         # as one of the required trace attributes
         trace_attributes = [
-            ('repeat', 'NUMERICAL'),
-            ('fold', 'NUMERICAL'),
-            ('iteration', 'NUMERICAL'),
-            ('evaluation', 'NUMERICAL'),
-            ('selected', ['true', 'false']),
-            ('repeat', 'NUMERICAL'),
+            ("repeat", "NUMERICAL"),
+            ("fold", "NUMERICAL"),
+            ("iteration", "NUMERICAL"),
+            ("evaluation", "NUMERICAL"),
+            ("selected", ["true", "false"]),
+            ("repeat", "NUMERICAL"),
         ]
-        trace_content = [[0, 0, 0, 0.5, 'true', 1], [0, 0, 0, 0.9, 'false', 2]]
+        trace_content = [[0, 0, 0, 0.5, "true", 1], [0, 0, 0, 0.9, "false", 2]]
         with self.assertRaisesRegex(
-            ValueError,
-            'Either setup_string or parameters needs to be passed as argument.'
+            ValueError, "Either setup_string or parameters needs to be passed as argument."
         ):
             OpenMLRunTrace.generate(trace_attributes, trace_content)
 
         trace_attributes = [
-            ('repeat', 'NUMERICAL'),
-            ('fold', 'NUMERICAL'),
-            ('iteration', 'NUMERICAL'),
-            ('evaluation', 'NUMERICAL'),
-            ('selected', ['true', 'false']),
-            ('sunshine', 'NUMERICAL'),
+            ("repeat", "NUMERICAL"),
+            ("fold", "NUMERICAL"),
+            ("iteration", "NUMERICAL"),
+            ("evaluation", "NUMERICAL"),
+            ("selected", ["true", "false"]),
+            ("sunshine", "NUMERICAL"),
         ]
-        trace_content = [[0, 0, 0, 0.5, 'true', 1], [0, 0, 0, 0.9, 'false', 2]]
+        trace_content = [[0, 0, 0, 0.5, "true", 1], [0, 0, 0, 0.9, "false", 2]]
         with self.assertRaisesRegex(
             ValueError,
-            'Encountered unknown attribute sunshine that does not start with '
-            'prefix parameter_'
+            "Encountered unknown attribute sunshine that does not start with " "prefix parameter_",
         ):
             OpenMLRunTrace.generate(trace_attributes, trace_content)
diff --git a/tests/test_setups/__init__.py b/tests/test_setups/__init__.py
index b71163cb2..245c252db 100644
--- a/tests/test_setups/__init__.py
+++ b/tests/test_setups/__init__.py
@@ -2,4 +2,4 @@
 
 # Dummy to allow mock classes in the test files to have a version number for
 # their parent module
-__version__ = '0.1'
+__version__ = "0.1"
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 4dc27c95f..e89318728 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -21,9 +21,9 @@ def get_sentinel():
     # identified by its name and external version online. Having a unique
     #  name allows us to publish the same flow in each test run
     md5 = hashlib.md5()
-    md5.update(str(time.time()).encode('utf-8'))
+    md5.update(str(time.time()).encode("utf-8"))
     sentinel = md5.hexdigest()[:10]
-    sentinel = 'TEST%s' % sentinel
+    sentinel = "TEST%s" % sentinel
     return sentinel
 
 
@@ -40,10 +40,10 @@ def test_nonexisting_setup_exists(self):
         # because of the sentinel, we can not use flows that contain subflows
         dectree = sklearn.tree.DecisionTreeClassifier()
         flow = self.extension.model_to_flow(dectree)
-        flow.name = 'TEST%s%s' % (sentinel, flow.name)
+        flow.name = "TEST%s%s" % (sentinel, flow.name)
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
         # although the flow exists (created as of previous statement),
         # we can be sure there are no setups (yet) as it was just created
@@ -54,10 +54,10 @@ def test_nonexisting_setup_exists(self):
     def _existing_setup_exists(self, classif):
 
         flow = self.extension.model_to_flow(classif)
-        flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
+        flow.name = "TEST%s%s" % (get_sentinel(), flow.name)
         flow.publish()
-        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
+        TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
@@ -72,8 +72,8 @@ def _existing_setup_exists(self, classif):
         # spoof flow id, otherwise the sentinel is ignored
         run.flow_id = flow.flow_id
         run.publish()
-        TestBase._mark_entity_for_removal('run', run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
+        TestBase._mark_entity_for_removal("run", run.run_id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
         # download the run, as it contains the right setup id
         run = openml.runs.get_run(run.run_id)
 
@@ -85,10 +85,9 @@ def test_existing_setup_exists_1(self):
         def side_effect(self):
             self.var_smoothing = 1e-9
             self.priors = None
+
         with unittest.mock.patch.object(
-                sklearn.naive_bayes.GaussianNB,
-                '__init__',
-                side_effect,
+            sklearn.naive_bayes.GaussianNB, "__init__", side_effect,
         ):
             # Check a flow with zero hyperparameters
             nb = sklearn.naive_bayes.GaussianNB()
@@ -112,7 +111,7 @@ def test_existing_setup_exists_3(self):
 
     def test_get_setup(self):
         # no setups in default test server
-        openml.config.server = 'https://www.openml.org/api/v1/xml/'
+        openml.config.server = "https://www.openml.org/api/v1/xml/"
 
         # contains all special cases, 0 params, 1 param, n params.
         # Non scikitlearn flows.
@@ -141,24 +140,23 @@ def test_setup_list_filter_flow(self):
     def test_list_setups_empty(self):
         setups = openml.setups.list_setups(setup=[0])
         if len(setups) > 0:
-            raise ValueError('UnitTest Outdated, got somehow results')
+            raise ValueError("UnitTest Outdated, got somehow results")
 
         self.assertIsInstance(setups, dict)
 
     def test_list_setups_output_format(self):
         openml.config.server = self.production_server
         flow_id = 6794
-        setups = openml.setups.list_setups(flow=flow_id, output_format='object', size=10)
+        setups = openml.setups.list_setups(flow=flow_id, output_format="object", size=10)
         self.assertIsInstance(setups, Dict)
-        self.assertIsInstance(setups[list(setups.keys())[0]],
-                              openml.setups.setup.OpenMLSetup)
+        self.assertIsInstance(setups[list(setups.keys())[0]], openml.setups.setup.OpenMLSetup)
         self.assertEqual(len(setups), 10)
 
-        setups = openml.setups.list_setups(flow=flow_id, output_format='dataframe', size=10)
+        setups = openml.setups.list_setups(flow=flow_id, output_format="dataframe", size=10)
         self.assertIsInstance(setups, pd.DataFrame)
         self.assertEqual(len(setups), 10)
 
-        setups = openml.setups.list_setups(flow=flow_id, output_format='dict', size=10)
+        setups = openml.setups.list_setups(flow=flow_id, output_format="dict", size=10)
         self.assertIsInstance(setups, Dict)
         self.assertIsInstance(setups[list(setups.keys())[0]], Dict)
         self.assertEqual(len(setups), 10)
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index b93565511..2c403aa84 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -33,13 +33,11 @@ def test_Figure1a(self):
         import sklearn.preprocessing
         import sklearn.tree
 
-        benchmark_suite = openml.study.get_study(
-            'OpenML100', 'tasks'
-        )  # obtain the benchmark suite
+        benchmark_suite = openml.study.get_study("OpenML100", "tasks")  # obtain the benchmark suite
         clf = sklearn.pipeline.Pipeline(
             steps=[
-                ('imputer', SimpleImputer()),
-                ('estimator', sklearn.tree.DecisionTreeClassifier())
+                ("imputer", SimpleImputer()),
+                ("estimator", sklearn.tree.DecisionTreeClassifier()),
             ]
         )  # build a sklearn classifier
         for task_id in benchmark_suite.tasks[:1]:  # iterate over all tasks
@@ -49,12 +47,11 @@ def test_Figure1a(self):
             run = openml.runs.run_model_on_task(
                 clf, task, avoid_duplicate_runs=False
             )  # run classifier on splits (requires API key)
-            score = run.get_metric_fn(
-                sklearn.metrics.accuracy_score
-            )  # print accuracy score
-            print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name, score.mean()))
+            score = run.get_metric_fn(sklearn.metrics.accuracy_score)  # print accuracy score
+            print("Data set: %s; Accuracy: %0.2f" % (task.get_dataset().name, score.mean()))
             run.publish()  # publish the experiment on OpenML (optional)
-            TestBase._mark_entity_for_removal('run', run.run_id)
-            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                run.run_id))
-            print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
+            TestBase._mark_entity_for_removal("run", run.run_id)
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], run.run_id)
+            )
+            print("URL for run: %s/run/%d" % (openml.config.server, run.run_id))
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 490fc7226..b3adfc9d6 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -32,9 +32,9 @@ def test_get_study_new(self):
     def test_get_openml100(self):
         openml.config.server = self.production_server
 
-        study = openml.study.get_study('OpenML100', 'tasks')
+        study = openml.study.get_study("OpenML100", "tasks")
         self.assertIsInstance(study, openml.study.OpenMLBenchmarkSuite)
-        study_2 = openml.study.get_suite('OpenML100')
+        study_2 = openml.study.get_suite("OpenML100")
         self.assertIsInstance(study_2, openml.study.OpenMLBenchmarkSuite)
         self.assertEqual(study.study_id, study_2.study_id)
 
@@ -42,8 +42,7 @@ def test_get_study_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError,
-            "Unexpected entity type 'task' reported by the server, expected 'run'",
+            ValueError, "Unexpected entity type 'task' reported by the server, expected 'run'",
         ):
             openml.study.get_study(99)
 
@@ -61,26 +60,25 @@ def test_get_suite_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError,
-            "Unexpected entity type 'run' reported by the server, expected 'task'",
+            ValueError, "Unexpected entity type 'run' reported by the server, expected 'task'",
         ):
             openml.study.get_suite(123)
 
     def test_publish_benchmark_suite(self):
         fixture_alias = None
-        fixture_name = 'unit tested benchmark suite'
-        fixture_descr = 'bla'
+        fixture_name = "unit tested benchmark suite"
+        fixture_descr = "bla"
         fixture_task_ids = [1, 2, 3]
 
         study = openml.study.create_benchmark_suite(
             alias=fixture_alias,
             name=fixture_name,
             description=fixture_descr,
-            task_ids=fixture_task_ids
+            task_ids=fixture_task_ids,
         )
         study.publish()
-        TestBase._mark_entity_for_removal('study', study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], study.id))
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
 
         self.assertGreater(study.id, 0)
 
@@ -89,7 +87,7 @@ def test_publish_benchmark_suite(self):
         self.assertEqual(study_downloaded.alias, fixture_alias)
         self.assertEqual(study_downloaded.name, fixture_name)
         self.assertEqual(study_downloaded.description, fixture_descr)
-        self.assertEqual(study_downloaded.main_entity_type, 'task')
+        self.assertEqual(study_downloaded.main_entity_type, "task")
         # verify resources
         self.assertIsNone(study_downloaded.flows)
         self.assertIsNone(study_downloaded.setups)
@@ -103,28 +101,26 @@ def test_publish_benchmark_suite(self):
         openml.study.attach_to_study(study.id, tasks_additional)
         study_downloaded = openml.study.get_suite(study.id)
         # verify again
-        self.assertSetEqual(set(study_downloaded.tasks),
-                            set(fixture_task_ids + tasks_additional))
+        self.assertSetEqual(set(study_downloaded.tasks), set(fixture_task_ids + tasks_additional))
         # test detach function
         openml.study.detach_from_study(study.id, fixture_task_ids)
         study_downloaded = openml.study.get_suite(study.id)
-        self.assertSetEqual(set(study_downloaded.tasks),
-                            set(tasks_additional))
+        self.assertSetEqual(set(study_downloaded.tasks), set(tasks_additional))
 
         # test status update function
-        openml.study.update_suite_status(study.id, 'deactivated')
+        openml.study.update_suite_status(study.id, "deactivated")
         study_downloaded = openml.study.get_suite(study.id)
-        self.assertEqual(study_downloaded.status, 'deactivated')
+        self.assertEqual(study_downloaded.status, "deactivated")
         # can't delete study, now it's not longer in preparation
 
     def test_publish_study(self):
         # get some random runs to attach
-        run_list = openml.evaluations.list_evaluations('predictive_accuracy', size=10)
+        run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10)
         self.assertEqual(len(run_list), 10)
 
         fixt_alias = None
-        fixt_name = 'unit tested study'
-        fixt_descr = 'bla'
+        fixt_name = "unit tested study"
+        fixt_descr = "bla"
         fixt_flow_ids = set([evaluation.flow_id for evaluation in run_list.values()])
         fixt_task_ids = set([evaluation.task_id for evaluation in run_list.values()])
         fixt_setup_ids = set([evaluation.setup_id for evaluation in run_list.values()])
@@ -134,7 +130,7 @@ def test_publish_study(self):
             benchmark_suite=None,
             name=fixt_name,
             description=fixt_descr,
-            run_ids=list(run_list.keys())
+            run_ids=list(run_list.keys()),
         )
         study.publish()
         # not tracking upload for delete since _delete_entity called end of function
@@ -144,7 +140,7 @@ def test_publish_study(self):
         self.assertEqual(study_downloaded.alias, fixt_alias)
         self.assertEqual(study_downloaded.name, fixt_name)
         self.assertEqual(study_downloaded.description, fixt_descr)
-        self.assertEqual(study_downloaded.main_entity_type, 'run')
+        self.assertEqual(study_downloaded.main_entity_type, "run")
 
         self.assertSetEqual(set(study_downloaded.runs), set(run_list.keys()))
         self.assertSetEqual(set(study_downloaded.setups), set(fixt_setup_ids))
@@ -156,13 +152,12 @@ def test_publish_study(self):
         self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
 
         # test whether the list evaluation function also handles study data fine
-        run_ids = openml.evaluations.list_evaluations('predictive_accuracy', study=study.id)
+        run_ids = openml.evaluations.list_evaluations("predictive_accuracy", study=study.id)
         self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
 
         # attach more runs
         run_list_additional = openml.runs.list_runs(size=10, offset=10)
-        openml.study.attach_to_study(study.id,
-                                     list(run_list_additional.keys()))
+        openml.study.attach_to_study(study.id, list(run_list_additional.keys()))
         study_downloaded = openml.study.get_study(study.id)
         # verify again
         all_run_ids = set(run_list_additional.keys()) | set(run_list.keys())
@@ -171,13 +166,12 @@ def test_publish_study(self):
         # test detach function
         openml.study.detach_from_study(study.id, list(run_list.keys()))
         study_downloaded = openml.study.get_study(study.id)
-        self.assertSetEqual(set(study_downloaded.runs),
-                            set(run_list_additional.keys()))
+        self.assertSetEqual(set(study_downloaded.runs), set(run_list_additional.keys()))
 
         # test status update function
-        openml.study.update_study_status(study.id, 'deactivated')
+        openml.study.update_study_status(study.id, "deactivated")
         study_downloaded = openml.study.get_study(study.id)
-        self.assertEqual(study_downloaded.status, 'deactivated')
+        self.assertEqual(study_downloaded.status, "deactivated")
 
         res = openml.study.delete_study(study.id)
         self.assertTrue(res)
@@ -191,34 +185,35 @@ def test_study_attach_illegal(self):
         study = openml.study.create_study(
             alias=None,
             benchmark_suite=None,
-            name='study with illegal runs',
-            description='none',
-            run_ids=list(run_list.keys())
+            name="study with illegal runs",
+            description="none",
+            run_ids=list(run_list.keys()),
         )
         study.publish()
-        TestBase._mark_entity_for_removal('study', study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], study.id))
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
         study_original = openml.study.get_study(study.id)
 
-        with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
-                                    'Problem attaching entities.'):
+        with self.assertRaisesRegex(
+            openml.exceptions.OpenMLServerException, "Problem attaching entities."
+        ):
             # run id does not exists
             openml.study.attach_to_study(study.id, [0])
 
-        with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
-                                    'Problem attaching entities.'):
+        with self.assertRaisesRegex(
+            openml.exceptions.OpenMLServerException, "Problem attaching entities."
+        ):
             # some runs already attached
             openml.study.attach_to_study(study.id, list(run_list_more.keys()))
         study_downloaded = openml.study.get_study(study.id)
         self.assertListEqual(study_original.runs, study_downloaded.runs)
 
     def test_study_list(self):
-        study_list = openml.study.list_studies(status='in_preparation')
+        study_list = openml.study.list_studies(status="in_preparation")
         # might fail if server is recently resetted
         self.assertGreater(len(study_list), 2)
 
     def test_study_list_output_format(self):
-        study_list = openml.study.list_studies(status='in_preparation',
-                                               output_format='dataframe')
+        study_list = openml.study.list_studies(status="in_preparation", output_format="dataframe")
         self.assertIsInstance(study_list, pd.DataFrame)
         self.assertGreater(len(study_list), 2)
diff --git a/tests/test_tasks/__init__.py b/tests/test_tasks/__init__.py
index 2969dc9dd..e987ab735 100644
--- a/tests/test_tasks/__init__.py
+++ b/tests/test_tasks/__init__.py
@@ -4,6 +4,6 @@
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
 __all__ = [
-    'OpenMLTaskTest',
-    'OpenMLSupervisedTaskTest',
+    "OpenMLTaskTest",
+    "OpenMLSupervisedTaskTest",
 ]
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index 13068e8cb..b19be7017 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -22,7 +22,7 @@ def test_get_X_and_Y(self):
         X, Y = super(OpenMLClassificationTaskTest, self).test_get_X_and_Y()
         self.assertEqual((768, 8), X.shape)
         self.assertIsInstance(X, np.ndarray)
-        self.assertEqual((768, ), Y.shape)
+        self.assertEqual((768,), Y.shape)
         self.assertIsInstance(Y, np.ndarray)
         self.assertEqual(Y.dtype, int)
 
@@ -36,7 +36,4 @@ def test_download_task(self):
     def test_class_labels(self):
 
         task = get_task(self.task_id)
-        self.assertEqual(
-            task.class_labels,
-            ['tested_negative', 'tested_positive']
-        )
+        self.assertEqual(task.class_labels, ["tested_negative", "tested_positive"])
diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py
index 8f916717a..e46369802 100644
--- a/tests/test_tasks/test_clustering_task.py
+++ b/tests/test_tasks/test_clustering_task.py
@@ -40,12 +40,13 @@ def test_upload_task(self):
                 task = openml.tasks.create_task(
                     task_type_id=self.task_type_id,
                     dataset_id=dataset_id,
-                    estimation_procedure_id=self.estimation_procedure
+                    estimation_procedure_id=self.estimation_procedure,
                 )
                 task = task.publish()
-                TestBase._mark_entity_for_removal('task', task.id)
-                TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                    task.id))
+                TestBase._mark_entity_for_removal("task", task.id)
+                TestBase.logger.info(
+                    "collected from {}: {}".format(__file__.split("/")[-1], task.id)
+                )
                 # success
                 break
             except OpenMLServerException as e:
@@ -58,5 +59,5 @@ def test_upload_task(self):
                     raise e
         else:
             raise ValueError(
-                'Could not create a valid task for task type ID {}'.format(self.task_type_id)
+                "Could not create a valid task for task type ID {}".format(self.task_type_id)
             )
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index bfcfebcd2..b8e156ee6 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -22,7 +22,7 @@ def test_get_X_and_Y(self):
         X, Y = super(OpenMLLearningCurveTaskTest, self).test_get_X_and_Y()
         self.assertEqual((768, 8), X.shape)
         self.assertIsInstance(X, np.ndarray)
-        self.assertEqual((768, ), Y.shape)
+        self.assertEqual((768,), Y.shape)
         self.assertIsInstance(Y, np.ndarray)
         self.assertEqual(Y.dtype, int)
 
@@ -36,7 +36,4 @@ def test_download_task(self):
     def test_class_labels(self):
 
         task = get_task(self.task_id)
-        self.assertEqual(
-            task.class_labels,
-            ['tested_negative', 'tested_positive']
-        )
+        self.assertEqual(task.class_labels, ["tested_negative", "tested_positive"])
diff --git a/tests/test_tasks/test_split.py b/tests/test_tasks/test_split.py
index fb31a56b3..7c3dcf9aa 100644
--- a/tests/test_tasks/test_split.py
+++ b/tests/test_tasks/test_split.py
@@ -18,8 +18,15 @@ def setUp(self):
         self.directory = os.path.dirname(__file__)
         # This is for dataset
         self.arff_filename = os.path.join(
-            self.directory, "..", "files", "org", "openml", "test",
-            "tasks", "1882", "datasplits.arff"
+            self.directory,
+            "..",
+            "files",
+            "org",
+            "openml",
+            "test",
+            "tasks",
+            "1882",
+            "datasplits.arff",
         )
         self.pd_filename = self.arff_filename.replace(".arff", ".pkl.py3")
 
@@ -65,9 +72,9 @@ def test_from_arff_file(self):
             for j in range(10):
                 self.assertGreaterEqual(split.split[i][j][0].train.shape[0], 808)
                 self.assertGreaterEqual(split.split[i][j][0].test.shape[0], 89)
-                self.assertEqual(split.split[i][j][0].train.shape[0]
-                                 + split.split[i][j][0].test.shape[0],
-                                 898)
+                self.assertEqual(
+                    split.split[i][j][0].train.shape[0] + split.split[i][j][0].test.shape[0], 898
+                )
 
     def test_get_split(self):
         split = OpenMLSplit._from_arff_file(self.arff_filename)
@@ -75,14 +82,8 @@ def test_get_split(self):
         self.assertEqual(train_split.shape[0], 808)
         self.assertEqual(test_split.shape[0], 90)
         self.assertRaisesRegex(
-            ValueError,
-            "Repeat 10 not known",
-            split.get,
-            10, 2,
+            ValueError, "Repeat 10 not known", split.get, 10, 2,
         )
         self.assertRaisesRegex(
-            ValueError,
-            "Fold 10 not known",
-            split.get,
-            2, 10,
+            ValueError, "Fold 10 not known", split.get, 2, 10,
         )
diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py
index 59fe61bc5..4e1a89f6e 100644
--- a/tests/test_tasks/test_supervised_task.py
+++ b/tests/test_tasks/test_supervised_task.py
@@ -20,10 +20,7 @@ class OpenMLSupervisedTaskTest(OpenMLTaskTest):
     @classmethod
     def setUpClass(cls):
         if cls is OpenMLSupervisedTaskTest:
-            raise unittest.SkipTest(
-                "Skip OpenMLSupervisedTaskTest tests,"
-                " it's a base class"
-            )
+            raise unittest.SkipTest("Skip OpenMLSupervisedTaskTest tests," " it's a base class")
         super(OpenMLSupervisedTaskTest, cls).setUpClass()
 
     def setUp(self, n_levels: int = 1):
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
index 9d80a1dec..ae92f12ad 100644
--- a/tests/test_tasks/test_task.py
+++ b/tests/test_tasks/test_task.py
@@ -10,10 +10,7 @@
     get_dataset,
     list_datasets,
 )
-from openml.tasks import (
-    create_task,
-    get_task
-)
+from openml.tasks import create_task, get_task
 
 
 class OpenMLTaskTest(TestBase):
@@ -27,10 +24,7 @@ class OpenMLTaskTest(TestBase):
     @classmethod
     def setUpClass(cls):
         if cls is OpenMLTaskTest:
-            raise unittest.SkipTest(
-                "Skip OpenMLTaskTest tests,"
-                " it's a base class"
-            )
+            raise unittest.SkipTest("Skip OpenMLTaskTest tests," " it's a base class")
         super(OpenMLTaskTest, cls).setUpClass()
 
     def setUp(self, n_levels: int = 1):
@@ -56,13 +50,14 @@ def test_upload_task(self):
                     task_type_id=self.task_type_id,
                     dataset_id=dataset_id,
                     target_name=self._get_random_feature(dataset_id),
-                    estimation_procedure_id=self.estimation_procedure
+                    estimation_procedure_id=self.estimation_procedure,
                 )
 
                 task.publish()
-                TestBase._mark_entity_for_removal('task', task.id)
-                TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
-                                                                    task.id))
+                TestBase._mark_entity_for_removal("task", task.id)
+                TestBase.logger.info(
+                    "collected from {}: {}".format(__file__.split("/")[-1], task.id)
+                )
                 # success
                 break
             except OpenMLServerException as e:
@@ -75,13 +70,13 @@ def test_upload_task(self):
                     raise e
         else:
             raise ValueError(
-                'Could not create a valid task for task type ID {}'.format(self.task_type_id)
+                "Could not create a valid task for task type ID {}".format(self.task_type_id)
             )
 
     def _get_compatible_rand_dataset(self) -> List:
 
         compatible_datasets = []
-        active_datasets = list_datasets(status='active')
+        active_datasets = list_datasets(status="active")
 
         # depending on the task type, find either datasets
         # with only symbolic features or datasets with only
@@ -89,8 +84,8 @@ def _get_compatible_rand_dataset(self) -> List:
         if self.task_type_id == 2:
             # regression task
             for dataset_id, dataset_info in active_datasets.items():
-                if 'NumberOfSymbolicFeatures' in dataset_info:
-                    if dataset_info['NumberOfSymbolicFeatures'] == 0:
+                if "NumberOfSymbolicFeatures" in dataset_info:
+                    if dataset_info["NumberOfSymbolicFeatures"] == 0:
                         compatible_datasets.append(dataset_id)
         elif self.task_type_id == 5:
             # clustering task
@@ -99,8 +94,8 @@ def _get_compatible_rand_dataset(self) -> List:
             for dataset_id, dataset_info in active_datasets.items():
                 # extra checks because of:
                 # https://github.com/openml/OpenML/issues/959
-                if 'NumberOfNumericFeatures' in dataset_info:
-                    if dataset_info['NumberOfNumericFeatures'] == 0:
+                if "NumberOfNumericFeatures" in dataset_info:
+                    if dataset_info["NumberOfNumericFeatures"] == 0:
                         compatible_datasets.append(dataset_id)
 
         # in-place shuffling
@@ -120,9 +115,9 @@ def _get_random_feature(self, dataset_id: int) -> str:
             random_feature_index = randint(0, len(random_dataset.features) - 1)
             random_feature = random_dataset.features[random_feature_index]
             if self.task_type_id == 2:
-                if random_feature.data_type == 'numeric':
+                if random_feature.data_type == "numeric":
                     break
             else:
-                if random_feature.data_type == 'nominal':
+                if random_feature.data_type == "nominal":
                     break
         return random_feature.name
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 4a71a83a7..ec62c953a 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -36,17 +36,16 @@ def test__get_cached_task_not_cached(self):
         openml.config.cache_directory = self.static_cache_dir
         self.assertRaisesRegex(
             OpenMLCacheException,
-            'Task file for tid 2 not cached',
+            "Task file for tid 2 not cached",
             openml.tasks.functions._get_cached_task,
             2,
         )
 
     def test__get_estimation_procedure_list(self):
-        estimation_procedures = openml.tasks.functions.\
-            _get_estimation_procedure_list()
+        estimation_procedures = openml.tasks.functions._get_estimation_procedure_list()
         self.assertIsInstance(estimation_procedures, list)
         self.assertIsInstance(estimation_procedures[0], dict)
-        self.assertEqual(estimation_procedures[0]['task_type_id'], 1)
+        self.assertEqual(estimation_procedures[0]["task_type_id"], 1)
 
     def test_list_clustering_task(self):
         # as shown by #383, clustering tasks can give list/dict casting problems
@@ -57,12 +56,11 @@ def test_list_clustering_task(self):
     def _check_task(self, task):
         self.assertEqual(type(task), dict)
         self.assertGreaterEqual(len(task), 2)
-        self.assertIn('did', task)
-        self.assertIsInstance(task['did'], int)
-        self.assertIn('status', task)
-        self.assertIsInstance(task['status'], str)
-        self.assertIn(task['status'],
-                      ['in_preparation', 'active', 'deactivated'])
+        self.assertIn("did", task)
+        self.assertIsInstance(task["did"], int)
+        self.assertIn("status", task)
+        self.assertIsInstance(task["status"], str)
+        self.assertIn(task["status"], ["in_preparation", "active", "deactivated"])
 
     def test_list_tasks_by_type(self):
         num_curves_tasks = 200  # number is flexible, check server if fails
@@ -75,20 +73,20 @@ def test_list_tasks_by_type(self):
 
     def test_list_tasks_output_format(self):
         ttid = 3
-        tasks = openml.tasks.list_tasks(task_type_id=ttid, output_format='dataframe')
+        tasks = openml.tasks.list_tasks(task_type_id=ttid, output_format="dataframe")
         self.assertIsInstance(tasks, pd.DataFrame)
         self.assertGreater(len(tasks), 100)
 
     def test_list_tasks_empty(self):
-        tasks = openml.tasks.list_tasks(tag='NoOneWillEverUseThisTag')
+        tasks = openml.tasks.list_tasks(tag="NoOneWillEverUseThisTag")
         if len(tasks) > 0:
-            raise ValueError('UnitTest Outdated, got somehow results (tag is used, please adapt)')
+            raise ValueError("UnitTest Outdated, got somehow results (tag is used, please adapt)")
 
         self.assertIsInstance(tasks, dict)
 
     def test_list_tasks_by_tag(self):
         num_basic_tasks = 100  # number is flexible, check server if fails
-        tasks = openml.tasks.list_tasks(tag='OpenML100')
+        tasks = openml.tasks.list_tasks(tag="OpenML100")
         self.assertGreaterEqual(len(tasks), num_basic_tasks)
         for tid in tasks:
             self._check_task(tasks[tid])
@@ -124,7 +122,9 @@ def test__get_task(self):
         openml.config.cache_directory = self.static_cache_dir
         openml.tasks.get_task(1882)
 
-    @unittest.skip("Please await outcome of discussion: https://github.com/openml/OpenML/issues/776")  # noqa: E501
+    @unittest.skip(
+        "Please await outcome of discussion: https://github.com/openml/OpenML/issues/776"
+    )  # noqa: E501
     def test__get_task_live(self):
         # Test the following task as it used to throw an Unicode Error.
         # https://github.com/openml/openml-python/issues/378
@@ -134,38 +134,52 @@ def test__get_task_live(self):
     def test_get_task(self):
         task = openml.tasks.get_task(1)
         self.assertIsInstance(task, OpenMLTask)
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "1", "task.xml",
-        )))
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "1", "datasplits.arff"
-        )))
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "datasets", "1", "dataset.arff"
-        )))
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml",)
+            )
+        )
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff")
+            )
+        )
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff")
+            )
+        )
 
     def test_get_task_lazy(self):
         task = openml.tasks.get_task(2, download_data=False)
         self.assertIsInstance(task, OpenMLTask)
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "2", "task.xml",
-        )))
-        self.assertEqual(task.class_labels, ['1', '2', '3', '4', '5', 'U'])
-
-        self.assertFalse(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "2", "datasplits.arff"
-        )))
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "task.xml",)
+            )
+        )
+        self.assertEqual(task.class_labels, ["1", "2", "3", "4", "5", "U"])
+
+        self.assertFalse(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff")
+            )
+        )
         # Since the download_data=False is propagated to get_dataset
-        self.assertFalse(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "datasets", "2", "dataset.arff"
-        )))
+        self.assertFalse(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "datasets", "2", "dataset.arff")
+            )
+        )
 
         task.download_split()
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "2", "datasplits.arff"
-        )))
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "datasplits.arff")
+            )
+        )
 
-    @mock.patch('openml.tasks.functions.get_dataset')
+    @mock.patch("openml.tasks.functions.get_dataset")
     def test_removal_upon_download_failure(self, get_dataset):
         class WeirdException(Exception):
             pass
@@ -181,9 +195,7 @@ def assert_and_raise(*args, **kwargs):
         except WeirdException:
             pass
         # Now the file should no longer exist
-        self.assertFalse(os.path.exists(
-            os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")
-        ))
+        self.assertFalse(os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")))
 
     def test_get_task_with_cache(self):
         openml.config.cache_directory = self.static_cache_dir
@@ -203,15 +215,15 @@ def test_download_split(self):
         task = openml.tasks.get_task(1)
         split = task.download_split()
         self.assertEqual(type(split), OpenMLSplit)
-        self.assertTrue(os.path.exists(os.path.join(
-            self.workdir, 'org', 'openml', 'test', "tasks", "1", "datasplits.arff"
-        )))
+        self.assertTrue(
+            os.path.exists(
+                os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "datasplits.arff")
+            )
+        )
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        tid_cache_dir = openml.utils._create_cache_directory_for_id(
-            'tasks', 1,
-        )
+        tid_cache_dir = openml.utils._create_cache_directory_for_id("tasks", 1,)
         self.assertTrue(os.path.exists(tid_cache_dir))
-        openml.utils._remove_cache_dir_for_id('tasks', tid_cache_dir)
+        openml.utils._remove_cache_dir_for_id("tasks", tid_cache_dir)
         self.assertFalse(os.path.exists(tid_cache_dir))
diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py
index 5cddd7fc4..137e29fe4 100644
--- a/tests/test_tasks/test_task_methods.py
+++ b/tests/test_tasks/test_task_methods.py
@@ -8,7 +8,6 @@
 
 # Common methods between tasks
 class OpenMLTaskMethodsTest(TestBase):
-
     def setUp(self):
         super(OpenMLTaskMethodsTest, self).setUp()
 
@@ -41,7 +40,9 @@ def test_get_train_and_test_split_indices(self):
         self.assertEqual(681, train_indices[-1])
         self.assertEqual(583, test_indices[0])
         self.assertEqual(24, test_indices[-1])
-        self.assertRaisesRegexp(ValueError, "Fold 10 not known",
-                                task.get_train_test_split_indices, 10, 0)
-        self.assertRaisesRegexp(ValueError, "Repeat 10 not known",
-                                task.get_train_test_split_indices, 0, 10)
+        self.assertRaisesRegexp(
+            ValueError, "Fold 10 not known", task.get_train_test_split_indices, 10, 0
+        )
+        self.assertRaisesRegexp(
+            ValueError, "Repeat 10 not known", task.get_train_test_split_indices, 0, 10
+        )
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 152dd4dba..9729100bb 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -15,22 +15,19 @@ class OpenMLTaskTest(TestBase):
 
     def mocked_perform_api_call(call, request_method):
         # TODO: JvR: Why is this not a staticmethod?
-        url = openml.config.server + '/' + call
+        url = openml.config.server + "/" + call
         return openml._api_calls._download_text_file(url)
 
     def test_list_all(self):
         openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
 
-    @mock.patch('openml._api_calls._perform_api_call',
-                side_effect=mocked_perform_api_call)
+    @mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
     def test_list_all_few_results_available(self, _perform_api_call):
         # we want to make sure that the number of api calls is only 1.
         # Although we have multiple versions of the iris dataset, there is only
         # one with this name/version combination
 
-        datasets = openml.datasets.list_datasets(size=1000,
-                                                 data_name='iris',
-                                                 data_version=1)
+        datasets = openml.datasets.list_datasets(size=1000, data_name="iris", data_version=1)
         self.assertEqual(len(datasets), 1)
         self.assertEqual(_perform_api_call.call_count, 1)
 
@@ -84,8 +81,9 @@ def test_list_all_for_runs(self):
     def test_list_all_for_evaluations(self):
         required_size = 22
         # TODO apparently list_evaluations function does not support kwargs
-        evaluations = openml.evaluations.list_evaluations(function='predictive_accuracy',
-                                                          size=required_size)
+        evaluations = openml.evaluations.list_evaluations(
+            function="predictive_accuracy", size=required_size
+        )
 
         # might not be on test server after reset, please rerun test at least once if fails
         self.assertEqual(len(evaluations), required_size)

From 368700e37c958b4042f12d52b2dd8ab3e1ee5acc Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Mon, 6 Jul 2020 16:31:20 +0200
Subject: [PATCH 23/48] Improve error handling and error message when loading
 datasets (#925)

* MAINT 918: improve error handling and error message

* incorporate feedback from Pieter
---
 openml/datasets/dataset.py | 50 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 3b159f12a..05ed55fe3 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -456,6 +456,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
                     # The file is likely corrupt, see #780.
                     # We deal with this when loading the data in `_load_data`.
                     return data_pickle_file, data_feather_file, feather_attribute_file
+                except ModuleNotFoundError:
+                    # There was some issue loading the file, see #918
+                    # We deal with this when loading the data in `_load_data`.
+                    return data_pickle_file, data_feather_file, feather_attribute_file
+                except ValueError as e:
+                    if "unsupported pickle protocol" in e.args[0]:
+                        # There was some issue loading the file, see #898
+                        # We deal with this when loading the data in `_load_data`.
+                        return data_pickle_file, data_feather_file, feather_attribute_file
+                    else:
+                        raise
 
             # Between v0.8 and v0.9 the format of pickled data changed from
             # np.ndarray to pd.DataFrame. This breaks some backwards compatibility,
@@ -473,6 +484,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
                 # The file is likely corrupt, see #780.
                 # We deal with this when loading the data in `_load_data`.
                 return data_pickle_file, data_feather_file, feather_attribute_file
+            except ModuleNotFoundError:
+                # There was some issue loading the file, see #918
+                # We deal with this when loading the data in `_load_data`.
+                return data_pickle_file, data_feather_file, feather_attribute_file
+            except ValueError as e:
+                if "unsupported pickle protocol" in e.args[0]:
+                    # There was some issue loading the file, see #898
+                    # We deal with this when loading the data in `_load_data`.
+                    return data_pickle_file, data_feather_file, feather_attribute_file
+                else:
+                    raise
 
             logger.debug("Data feather file already exists and is up to date.")
             return data_pickle_file, data_feather_file, feather_attribute_file
@@ -529,7 +551,7 @@ def _load_data(self):
                 "Detected a corrupt cache file loading dataset %d: '%s'. "
                 "We will continue loading data from the arff-file, "
                 "but this will be much slower for big datasets. "
-                "Please manually delete the cache file if you want openml-python "
+                "Please manually delete the cache file if you want OpenML-Python "
                 "to attempt to reconstruct it."
                 "" % (self.dataset_id, self.data_pickle_file)
             )
@@ -539,6 +561,32 @@ def _load_data(self):
                 "Cannot find a pickle file for dataset {} at "
                 "location {} ".format(self.name, self.data_pickle_file)
             )
+        except ModuleNotFoundError as e:
+            logger.warning(
+                "Encountered error message when loading cached dataset %d: '%s'. "
+                "Error message was: %s. "
+                "This is most likely due to  https://github.com/openml/openml-python/issues/918. "
+                "We will continue loading data from the arff-file, "
+                "but this will be much slower for big datasets. "
+                "Please manually delete the cache file if you want OpenML-Python "
+                "to attempt to reconstruct it."
+                "" % (self.dataset_id, self.data_pickle_file, e.args[0]),
+            )
+            data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
+        except ValueError as e:
+            if "unsupported pickle protocol" in e.args[0]:
+                logger.warning(
+                    "Encountered unsupported pickle protocol when loading cached dataset %d: '%s'. "
+                    "Error message was: %s. "
+                    "We will continue loading data from the arff-file, "
+                    "but this will be much slower for big datasets. "
+                    "Please manually delete the cache file if you want OpenML-Python "
+                    "to attempt to reconstruct it."
+                    "" % (self.dataset_id, self.data_pickle_file, e.args[0]),
+                )
+                data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
+            else:
+                raise
 
         return data, categorical, attribute_names
 

From 6b245bd4db7c64c9670559d9085e3afcaf604920 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Mon, 6 Jul 2020 16:50:17 +0200
Subject: [PATCH 24/48] Increase unit test stability (#926)

* Increase unit test stability

by waiting longer for the server to process run traces, and by
querying the server less frequently for new run traces.

* Make test stricter

actually, we only wait for evaluations to ensure that the trace
is processed by the server. Therefore, we can also simply wait
for the trace being available instead of relying on the proxy
indicator of evaluations being available.

* fix stricter test
---
 tests/test_runs/test_run_functions.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 728467aa2..74f011b7c 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -81,10 +81,19 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
         start_time = time.time()
         while time.time() - start_time < max_waiting_time_seconds:
             run = openml.runs.get_run(run_id, ignore_cache=True)
-            if len(run.evaluations) > 0:
-                return
-            else:
-                time.sleep(3)
+
+            try:
+                openml.runs.get_run_trace(run_id)
+            except openml.exceptions.OpenMLServerException:
+                time.sleep(10)
+                continue
+
+            if len(run.evaluations) == 0:
+                time.sleep(10)
+                continue
+
+            return
+
         raise RuntimeError(
             "Could not find any evaluations! Please check whether run {} was "
             "evaluated correctly on the server".format(run_id)
@@ -915,7 +924,7 @@ def test_get_run_trace(self):
             run = run.publish()
             TestBase._mark_entity_for_removal("run", run.run_id)
             TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
-            self._wait_for_processed_run(run.run_id, 200)
+            self._wait_for_processed_run(run.run_id, 400)
             run_id = run.run_id
         except openml.exceptions.OpenMLRunsExistError as e:
             # The only error we expect, should fail otherwise.

From 2bfd581e212e7ef91ec65d1d7976f0984f72724a Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 7 Jul 2020 12:26:26 +0200
Subject: [PATCH 25/48] Restructure Contributing documentation (#928)

* Mention the initialization of pre-commit

* Restructure the two contribution guidelines

The rst file will now have general contribution information, for
contributions that are related to openml-python, but not actually to the
openml-python repository.
Information for making a contribution to the openml-python repository is
in the contributing markdown file.
---
 CONTRIBUTING.md      | 140 +++++++++++++++++++++++++++-----------
 doc/contributing.rst | 158 +++++--------------------------------------
 2 files changed, 117 insertions(+), 181 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 42ce4f9f8..8122b0b8e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,7 +1,76 @@
-How to contribute
------------------
+This document describes the workflow on how to contribute to the openml-python package.
+If you are interested in connecting a machine learning package with OpenML (i.e.
+write an openml-python extension) or want to find other ways to contribute, see [this page](https://openml.github.io/openml-python/master/contributing.html#contributing).
 
-The preferred workflow for contributing to the OpenML python connector is to
+Scope of the package
+--------------------
+
+The scope of the OpenML Python package is to provide a Python interface to
+the OpenML platform which integrates well with Python's scientific stack, most
+notably [numpy](http://www.numpy.org/), [scipy](https://www.scipy.org/) and
+[pandas](https://pandas.pydata.org/).
+To reduce opportunity costs and demonstrate the usage of the package, it also
+implements an interface to the most popular machine learning package written
+in Python, [scikit-learn](http://scikit-learn.org/stable/index.html).
+Thereby it will automatically be compatible with many machine learning
+libraries written in Python.
+
+We aim to keep the package as light-weight as possible and we will try to
+keep the number of potential installation dependencies as low as possible.
+Therefore, the connection to other machine learning libraries such as
+*pytorch*, *keras* or *tensorflow* should not be done directly inside this
+package, but in a separate package using the OpenML Python connector.
+More information on OpenML Python connectors can be found [here](https://openml.github.io/openml-python/master/contributing.html#contributing).
+
+Reporting bugs
+--------------
+We use GitHub issues to track all bugs and feature requests; feel free to
+open an issue if you have found a bug or wish to see a feature implemented.
+
+It is recommended to check that your issue complies with the
+following rules before submitting:
+
+-  Verify that your issue is not being currently addressed by other
+   [issues](https://github.com/openml/openml-python/issues)
+   or [pull requests](https://github.com/openml/openml-python/pulls).
+
+-  Please ensure all code snippets and error messages are formatted in
+   appropriate code blocks.
+   See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks).
+
+-  Please include your operating system type and version number, as well
+   as your Python, openml, scikit-learn, numpy, and scipy versions. This information
+   can be found by running the following code snippet:
+```python
+import platform; print(platform.platform())
+import sys; print("Python", sys.version)
+import numpy; print("NumPy", numpy.__version__)
+import scipy; print("SciPy", scipy.__version__)
+import sklearn; print("Scikit-Learn", sklearn.__version__)
+import openml; print("OpenML", openml.__version__)
+```
+
+Determine what contribution to make
+-----------------------------------
+Great! You've decided you want to help out. Now what?
+All contributions should be linked to issues on the [Github issue tracker](https://github.com/openml/openml-python/issues).
+In particular for new contributors, the *good first issue* label should help you find
+issues which are suitable for beginners.  Resolving these issues allow you to start
+contributing to the project without much prior knowledge. Your assistance in this area 
+will be greatly appreciated by the more experienced developers as it helps free up 
+their time to concentrate on other issues.
+
+If you encountered a particular part of the documentation or code that you want to improve,
+but there is no related open issue yet, open one first.
+This is important since you can first get feedback or pointers from experienced contributors.
+
+To let everyone know you are working on an issue, please leave a comment that states you will work on the issue
+(or, if you have the permission, *assign* yourself to the issue). This avoids double work!
+
+General git workflow
+--------------------
+
+The preferred workflow for contributing to openml-python is to
 fork the [main repository](https://github.com/openml/openml-python) on
 GitHub, clone, check out the branch `develop`, and develop on a new branch
 branch. Steps:
@@ -114,6 +183,10 @@ First install openml with its test dependencies by running
   $ pip install -e .[test]
   ```
 from the repository folder.
+Then configure pre-commit through
+ ```bash
+ $ pre-commit install
+ ```
 This will install dependencies to run unit tests, as well as [pre-commit](https://pre-commit.com/).
 To run the unit tests, and check their code coverage, run:
   ```bash
@@ -141,51 +214,38 @@ If you want to run the pre-commit tests without doing a commit, run:
   ```
 Make sure to do this at least once before your first commit to check your setup works.
 
-Filing bugs
------------
-We use GitHub issues to track all bugs and feature requests; feel free to
-open an issue if you have found a bug or wish to see a feature implemented.
-
-It is recommended to check that your issue complies with the
-following rules before submitting:
-
--  Verify that your issue is not being currently addressed by other
-   [issues](https://github.com/openml/openml-python/issues)
-   or [pull requests](https://github.com/openml/openml-python/pulls).
-
--  Please ensure all code snippets and error messages are formatted in
-   appropriate code blocks.
-   See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks).
-
--  Please include your operating system type and version number, as well
-   as your Python, openml, scikit-learn, numpy, and scipy versions. This information
-   can be found by running the following code snippet:
+Executing a specific unit test can be done by specifying the module, test case, and test.
+To obtain a hierarchical list of all tests, run
 
-  ```python
-  import platform; print(platform.platform())
-  import sys; print("Python", sys.version)
-  import numpy; print("NumPy", numpy.__version__)
-  import scipy; print("SciPy", scipy.__version__)
-  import sklearn; print("Scikit-Learn", sklearn.__version__)
-  import openml; print("OpenML", openml.__version__)
-  ```
+  ```bash
+  $  pytest --collect-only
+
+    <Module 'tests/test_datasets/test_dataset.py'>
+      <UnitTestCase 'OpenMLDatasetTest'>
+        <TestCaseFunction 'test_dataset_format_constructor'>
+        <TestCaseFunction 'test_get_data'>
+        <TestCaseFunction 'test_get_data_rowid_and_ignore_and_target'>
+        <TestCaseFunction 'test_get_data_with_ignore_attributes'>
+        <TestCaseFunction 'test_get_data_with_rowid'>
+        <TestCaseFunction 'test_get_data_with_target'>
+      <UnitTestCase 'OpenMLDatasetTestOnTestServer'>
+        <TestCaseFunction 'test_tagging'>
+   ```
 
-New contributor tips
---------------------
+You may then run a specific module, test case, or unit test respectively:
+```bash
+  $ pytest tests/test_datasets/test_dataset.py
+  $ pytest tests/test_datasets/test_dataset.py::OpenMLDatasetTest
+  $ pytest tests/test_datasets/test_dataset.py::OpenMLDatasetTest::test_get_data
+```
 
-A great way to start contributing to openml-python is to pick an item
-from the list of [Good First Issues](https://github.com/openml/openml-python/labels/Good%20first%20issue)
-in the issue tracker. Resolving these issues allow you to start
-contributing to the project without much prior knowledge. Your
-assistance in this area will be greatly appreciated by the more
-experienced developers as it helps free up their time to concentrate on
-other issues.
+Happy testing!
 
 Documentation
 -------------
 
 We are glad to accept any sort of documentation: function docstrings,
-reStructuredText documents (like this one), tutorials, etc.
+reStructuredText documents, tutorials, etc.
 reStructuredText documents live in the source code repository under the
 doc/ directory.
 
diff --git a/doc/contributing.rst b/doc/contributing.rst
index d23ac0ad2..92a113633 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -2,158 +2,34 @@
 
 .. _contributing:
 
-
 ============
 Contributing
 ============
 
-Contribution to the OpenML package is highly appreciated. Currently,
-there is a lot of work left on implementing API calls,
-testing them and providing examples to allow new users to easily use the
-OpenML package. See the :ref:`issues` section for open tasks.
-
-Please mark yourself as contributor in a github issue if you start working on
-something to avoid duplicate work. If you're part of the OpenML organization
-you can use github's assign feature, otherwise you can just leave a comment.
-
-.. _scope:
-
-Scope of the package
-====================
-
-The scope of the OpenML Python package is to provide a Python interface to
-the OpenML platform which integrates well with Python's scientific stack, most
-notably `numpy <http://www.numpy.org/>`_ and `scipy <https://www.scipy.org/>`_.
-To reduce opportunity costs and demonstrate the usage of the package, it also
-implements an interface to the most popular machine learning package written
-in Python, `scikit-learn <http://scikit-learn.org/stable/index.html>`_.
-Thereby it will automatically be compatible with many machine learning
-libraries written in Python.
-
-We aim to keep the package as light-weight as possible and we will try to
-keep the number of potential installation dependencies as low as possible.
-Therefore, the connection to other machine learning libraries such as
-*pytorch*, *keras* or *tensorflow* should not be done directly inside this
-package, but in a separate package using the OpenML Python connector.
-
-.. _issues:
-
-Open issues and potential todos
-===============================
-
-We collect open issues and feature requests in an `issue tracker on github <https://github.com/openml/openml-python/issues>`_.
-The issue tracker contains issues marked as *Good first issue*, which shows
-issues which are good for beginners. We also maintain a somewhat up-to-date
-`roadmap <https://github.com/openml/openml-python/issues/410>`_ which
-contains longer-term goals.
-
-.. _how_to_contribute:
-
-How to contribute
-=================
-
-There are many ways to contribute to the development of the OpenML Python
-connector and OpenML in general. We welcome all kinds of contributions,
-especially:
-
-* Source code which fixes an issue, improves usability or implements a new
-  feature.
-* Improvements to the documentation, which can be found in the ``doc``
-  directory.
-* New examples - current examples can be found in the ``examples`` directory.
-* Bug reports - if something doesn't work for you or is cumbersome, please
-  open a new issue to let us know about the problem.
-* Use the package and spread the word.
-* `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific
-  publication.
-* Visit one of our `hackathons <https://meet.openml.org/>`_.
-* Check out how to `contribute to the main OpenML project <https://github.com/openml/OpenML/blob/master/CONTRIBUTING.md>`_.
-
-Contributing code
-~~~~~~~~~~~~~~~~~
-
-Our guidelines on code contribution can be found in `this file <https://github.com/openml/openml-python/blob/master/CONTRIBUTING.md>`_.
-
-.. _installation:
-
-Installation
-============
-
-Installation from github
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-The package source code is available from
-`github <https://github.com/openml/openml-python>`_ and can be obtained with:
-
-.. code:: bash
-
-    git clone https://github.com/openml/openml-python.git
-
-
-Once you cloned the package, change into the new directory.
-If you are a regular user, install with
-
-.. code:: bash
-
-    pip install -e .
-
-If you are a contributor, you will also need to install test dependencies
-
-.. code:: bash
+Contribution to the OpenML package is highly appreciated in all forms.
+In particular, a few ways to contribute to openml-python are:
 
-    pip install -e ".[test]"
+ * A direct contribution to the package, by means of improving the
+   code, documentation or examples. To get started, see `this file <https://github.com/openml/openml-python/blob/master/CONTRIBUTING.md>`_
+   with details on how to set up your environment to develop for openml-python.
 
+ * A contribution to an openml-python extension. An extension package allows OpenML to interface
+   with a machine learning package (such as scikit-learn or keras). These extensions
+   are hosted in separate repositories and may have their own guidelines.
+   For more information, see the :ref:`extensions` below.
 
-Testing
-=======
-
-From within the directory of the cloned package, execute:
-
-.. code:: bash
-
-    pytest tests/
-
-Executing a specific test can be done by specifying the module, test case, and test.
-To obtain a hierarchical list of all tests, run
-
-.. code:: bash
-
-    pytest --collect-only
-
-.. code:: bash
-
-    <Module 'tests/test_datasets/test_dataset.py'>
-      <UnitTestCase 'OpenMLDatasetTest'>
-        <TestCaseFunction 'test_dataset_format_constructor'>
-        <TestCaseFunction 'test_get_data'>
-        <TestCaseFunction 'test_get_data_rowid_and_ignore_and_target'>
-        <TestCaseFunction 'test_get_data_with_ignore_attributes'>
-        <TestCaseFunction 'test_get_data_with_rowid'>
-        <TestCaseFunction 'test_get_data_with_target'>
-      <UnitTestCase 'OpenMLDatasetTestOnTestServer'>
-        <TestCaseFunction 'test_tagging'>
-
-
-To run a specific module, add the module name, for instance:
-
-.. code:: bash
-
-    pytest tests/test_datasets/test_dataset.py
-
-To run a specific unit test case, add the test case name, for instance:
-
-.. code:: bash
-
-    pytest tests/test_datasets/test_dataset.py::OpenMLDatasetTest
-
-To run a specific unit test, add the test name, for instance:
+ * Bug reports. If something doesn't work for you or is cumbersome, please
+  open a new issue to let us know about the problem.
+  See `this section <https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md#reporting-bugs>`_.
 
-.. code:: bash
+ * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific
+  publication.
 
-    pytest tests/test_datasets/test_dataset.py::OpenMLDatasetTest::test_get_data
+ * Visit one of our `hackathons <https://meet.openml.org/>`_.
 
-Happy testing!
+ * Contribute to another OpenML project, such as `the main OpenML project <https://github.com/openml/OpenML/blob/master/CONTRIBUTING.md>`_.
 
+.. _extensions:
 
 Connecting new machine learning libraries
 =========================================

From 525e8a63e0cc2aad229c19c957ff13b5934461cd Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Tue, 7 Jul 2020 21:33:10 +0200
Subject: [PATCH 26/48] improve error message for dataset upload (#927)

* improve error message for dataset upload

* fix unit test
---
 openml/datasets/dataset.py          | 36 ++++++++++++++++++++++++-----
 tests/test_datasets/test_dataset.py |  6 ++---
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 05ed55fe3..a6ea76592 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -133,16 +133,40 @@ def __init__(
         qualities=None,
         dataset=None,
     ):
+        def find_invalid_characters(string, pattern):
+            invalid_chars = set()
+            regex = re.compile(pattern)
+            for char in string:
+                if not regex.match(char):
+                    invalid_chars.add(char)
+            invalid_chars = ",".join(
+                [
+                    "'{}'".format(char) if char != "'" else '"{}"'.format(char)
+                    for char in invalid_chars
+                ]
+            )
+            return invalid_chars
+
         if dataset_id is None:
-            if description and not re.match("^[\x00-\x7F]*$", description):
+            pattern = "^[\x00-\x7F]*$"
+            if description and not re.match(pattern, description):
                 # not basiclatin (XSD complains)
-                raise ValueError("Invalid symbols in description: {}".format(description))
-            if citation and not re.match("^[\x00-\x7F]*$", citation):
+                invalid_characters = find_invalid_characters(description, pattern)
+                raise ValueError(
+                    "Invalid symbols {} in description: {}".format(invalid_characters, description)
+                )
+            pattern = "^[\x00-\x7F]*$"
+            if citation and not re.match(pattern, citation):
                 # not basiclatin (XSD complains)
-                raise ValueError("Invalid symbols in citation: {}".format(citation))
-            if not re.match("^[a-zA-Z0-9_\\-\\.\\(\\),]+$", name):
+                invalid_characters = find_invalid_characters(citation, pattern)
+                raise ValueError(
+                    "Invalid symbols {} in citation: {}".format(invalid_characters, citation)
+                )
+            pattern = "^[a-zA-Z0-9_\\-\\.\\(\\),]+$"
+            if not re.match(pattern, name):
                 # regex given by server in error message
-                raise ValueError("Invalid symbols in name: {}".format(name))
+                invalid_characters = find_invalid_characters(name, pattern)
+                raise ValueError("Invalid symbols {} in name: {}".format(invalid_characters, name))
         # TODO add function to check if the name is casual_string128
         # Attributes received by querying the RESTful API
         self.dataset_id = int(dataset_id) if dataset_id is not None else None
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index fcc6eddc7..73dbfa133 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -38,13 +38,13 @@ def test_repr(self):
         str(data)
 
     def test_init_string_validation(self):
-        with pytest.raises(ValueError, match="Invalid symbols in name"):
+        with pytest.raises(ValueError, match="Invalid symbols ' ' in name"):
             openml.datasets.OpenMLDataset(name="some name", description="a description")
 
-        with pytest.raises(ValueError, match="Invalid symbols in description"):
+        with pytest.raises(ValueError, match="Invalid symbols 'ï' in description"):
             openml.datasets.OpenMLDataset(name="somename", description="a descriptïon")
 
-        with pytest.raises(ValueError, match="Invalid symbols in citation"):
+        with pytest.raises(ValueError, match="Invalid symbols 'ü' in citation"):
             openml.datasets.OpenMLDataset(
                 name="somename", description="a description", citation="Something by Müller"
             )

From 4256834c3f9a361f748c26bed925a3e6d6d08739 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 8 Jul 2020 09:15:49 +0200
Subject: [PATCH 27/48] FIX #912: add create_task to API doc (#924)

---
 doc/api.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/api.rst b/doc/api.rst
index 0f1329d45..0bc092bd0 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -162,6 +162,7 @@ Modules
    :toctree: generated/
    :template: function.rst
 
+    create_task
     get_task
     get_tasks
     list_tasks

From e5dcaf01b100f7712a00a6f254e2ee7737930002 Mon Sep 17 00:00:00 2001
From: Bilgecelik <38037323+Bilgecelik@users.noreply.github.com>
Date: Tue, 14 Jul 2020 12:20:07 +0200
Subject: [PATCH 28/48] Rename arguments of list_evaluations (#933)

* list evals name change

* list evals - update
---
 .../30_extended/fetch_evaluations_tutorial.py |   6 +-
 .../plot_svm_hyperparameters_tutorial.py      |   4 +-
 examples/40_paper/2018_ida_strang_example.py  |   2 +-
 examples/40_paper/2018_kdd_rijn_example.py    |   4 +-
 .../40_paper/2018_neurips_perrone_example.py  |   6 +-
 openml/evaluations/functions.py               | 138 +++++++++---------
 .../test_evaluation_functions.py              |  32 ++--
 .../test_evaluations_example.py               |   4 +-
 8 files changed, 100 insertions(+), 96 deletions(-)

diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index de636e074..2823eabf3 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -63,7 +63,7 @@
 
 metric = "predictive_accuracy"
 evals = openml.evaluations.list_evaluations(
-    function=metric, task=[task_id], output_format="dataframe"
+    function=metric, tasks=[task_id], output_format="dataframe"
 )
 # Displaying the first 10 rows
 print(evals.head(n=10))
@@ -162,7 +162,7 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 # List evaluations in descending order based on predictive_accuracy with
 # hyperparameters
 evals_setups = openml.evaluations.list_evaluations_setups(
-    function="predictive_accuracy", task=[31], size=100, sort_order="desc"
+    function="predictive_accuracy", tasks=[31], size=100, sort_order="desc"
 )
 
 ""
@@ -173,7 +173,7 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 # with hyperparameters. parameters_in_separate_columns returns parameters in
 # separate columns
 evals_setups = openml.evaluations.list_evaluations_setups(
-    function="predictive_accuracy", flow=[6767], size=100, parameters_in_separate_columns=True
+    function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
 )
 
 ""
diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index aac84bcd4..e366c56df 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -20,8 +20,8 @@
 # uploaded runs (called *setup*).
 df = openml.evaluations.list_evaluations_setups(
     function="predictive_accuracy",
-    flow=[8353],
-    task=[6],
+    flows=[8353],
+    tasks=[6],
     output_format="dataframe",
     # Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise,
     # the dataframe would contain a field ``paramaters`` containing an unparsed dictionary.
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index 74c6fde5f..687d973c2 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -47,7 +47,7 @@
 
 # Downloads all evaluation records related to this study
 evaluations = openml.evaluations.list_evaluations(
-    measure, flow=flow_ids, study=study_id, output_format="dataframe"
+    measure, flows=flow_ids, study=study_id, output_format="dataframe"
 )
 # gives us a table with columns data_id, flow1_value, flow2_value
 evaluations = evaluations.pivot(index="data_id", columns="flow_id", values="value").dropna()
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index e5d998e35..752419ea3 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -88,8 +88,8 @@
     # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
     evals = openml.evaluations.list_evaluations_setups(
         evaluation_measure,
-        flow=[flow_id],
-        task=[task_id],
+        flows=[flow_id],
+        tasks=[task_id],
         size=limit_per_task,
         output_format="dataframe",
     )
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 8639e0a3a..60d212116 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -91,9 +91,9 @@ def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_cu
     # Fetching evaluations
     eval_df = openml.evaluations.list_evaluations_setups(
         function=metric,
-        task=task_ids,
-        flow=[flow_id],
-        uploader=[2702],
+        tasks=task_ids,
+        flows=[flow_id],
+        uploaders=[2702],
         output_format="dataframe",
         parameters_in_separate_columns=True,
     )
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index adaf419ef..4c17f8ce7 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -17,11 +17,11 @@ def list_evaluations(
     function: str,
     offset: Optional[int] = None,
     size: Optional[int] = None,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List[Union[str, int]]] = None,
+    setups: Optional[List[Union[str, int]]] = None,
+    flows: Optional[List[Union[str, int]]] = None,
+    runs: Optional[List[Union[str, int]]] = None,
+    uploaders: Optional[List[Union[str, int]]] = None,
     tag: Optional[str] = None,
     study: Optional[int] = None,
     per_fold: Optional[bool] = None,
@@ -41,17 +41,18 @@ def list_evaluations(
     size : int, optional
         the maximum number of runs to show
 
-    task : list, optional
-
-    setup: list, optional
-
-    flow : list, optional
-
-    run : list, optional
-
-    uploader : list, optional
-
+    tasks : list[int,str], optional
+        the list of task IDs
+    setups: list[int,str], optional
+        the list of setup IDs
+    flows : list[int,str], optional
+        the list of flow IDs
+    runs :list[int,str], optional
+        the list of run IDs
+    uploaders : list[int,str], optional
+        the list of uploader IDs
     tag : str, optional
+        filter evaluation based on given tag
 
     study : int, optional
 
@@ -85,11 +86,11 @@ def list_evaluations(
         function=function,
         offset=offset,
         size=size,
-        task=task,
-        setup=setup,
-        flow=flow,
-        run=run,
-        uploader=uploader,
+        tasks=tasks,
+        setups=setups,
+        flows=flows,
+        runs=runs,
+        uploaders=uploaders,
         tag=tag,
         study=study,
         sort_order=sort_order,
@@ -99,11 +100,11 @@ def list_evaluations(
 
 def _list_evaluations(
     function: str,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List] = None,
+    setups: Optional[List] = None,
+    flows: Optional[List] = None,
+    runs: Optional[List] = None,
+    uploaders: Optional[List] = None,
     study: Optional[int] = None,
     sort_order: Optional[str] = None,
     output_format: str = "object",
@@ -120,15 +121,16 @@ def _list_evaluations(
     function : str
         the evaluation function. e.g., predictive_accuracy
 
-    task : list, optional
-
-    setup: list, optional
-
-    flow : list, optional
-
-    run : list, optional
-
-    uploader : list, optional
+    tasks : list[int,str], optional
+        the list of task IDs
+    setups: list[int,str], optional
+        the list of setup IDs
+    flows : list[int,str], optional
+        the list of flow IDs
+    runs :list[int,str], optional
+        the list of run IDs
+    uploaders : list[int,str], optional
+        the list of uploader IDs
 
     study : int, optional
 
@@ -155,16 +157,16 @@ def _list_evaluations(
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
-    if task is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
-    if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
-    if flow is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
-    if run is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in run])
-    if uploader is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
+    if tasks is not None:
+        api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
+    if setups is not None:
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
+    if flows is not None:
+        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
+    if runs is not None:
+        api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
+    if uploaders is not None:
+        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
@@ -276,11 +278,11 @@ def list_evaluations_setups(
     function: str,
     offset: Optional[int] = None,
     size: Optional[int] = None,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List] = None,
+    setups: Optional[List] = None,
+    flows: Optional[List] = None,
+    runs: Optional[List] = None,
+    uploaders: Optional[List] = None,
     tag: Optional[str] = None,
     per_fold: Optional[bool] = None,
     sort_order: Optional[str] = None,
@@ -299,15 +301,15 @@ def list_evaluations_setups(
         the number of runs to skip, starting from the first
     size : int, optional
         the maximum number of runs to show
-    task : list[int], optional
+    tasks : list[int], optional
         the list of task IDs
-    setup: list[int], optional
+    setups: list[int], optional
         the list of setup IDs
-    flow : list[int], optional
+    flows : list[int], optional
         the list of flow IDs
-    run : list[int], optional
+    runs : list[int], optional
         the list of run IDs
-    uploader : list[int], optional
+    uploaders : list[int], optional
         the list of uploader IDs
     tag : str, optional
         filter evaluation based on given tag
@@ -327,7 +329,7 @@ def list_evaluations_setups(
     -------
     dict or dataframe with hyperparameter settings as a list of tuples.
     """
-    if parameters_in_separate_columns and (flow is None or len(flow) != 1):
+    if parameters_in_separate_columns and (flows is None or len(flows) != 1):
         raise ValueError(
             "Can set parameters_in_separate_columns to true " "only for single flow_id"
         )
@@ -337,11 +339,11 @@ def list_evaluations_setups(
         function=function,
         offset=offset,
         size=size,
-        run=run,
-        task=task,
-        setup=setup,
-        flow=flow,
-        uploader=uploader,
+        runs=runs,
+        tasks=tasks,
+        setups=setups,
+        flows=flows,
+        uploaders=uploaders,
         tag=tag,
         per_fold=per_fold,
         sort_order=sort_order,
@@ -359,24 +361,26 @@ def list_evaluations_setups(
         setup_chunks = np.array_split(
             ary=evals["setup_id"].unique(), indices_or_sections=((length - 1) // N) + 1
         )
-        setups = pd.DataFrame()
-        for setup in setup_chunks:
-            result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format="dataframe"))
+        setup_data = pd.DataFrame()
+        for setups in setup_chunks:
+            result = pd.DataFrame(
+                openml.setups.list_setups(setup=setups, output_format="dataframe")
+            )
             result.drop("flow_id", axis=1, inplace=True)
             # concat resulting setup chunks into single datframe
-            setups = pd.concat([setups, result], ignore_index=True)
+            setup_data = pd.concat([setup_data, result], ignore_index=True)
         parameters = []
         # Convert parameters of setup into list of tuples of (hyperparameter, value)
-        for parameter_dict in setups["parameters"]:
+        for parameter_dict in setup_data["parameters"]:
             if parameter_dict is not None:
                 parameters.append(
                     {param["full_name"]: param["value"] for param in parameter_dict.values()}
                 )
             else:
                 parameters.append({})
-        setups["parameters"] = parameters
+        setup_data["parameters"] = parameters
         # Merge setups with evaluations
-        df = pd.merge(evals, setups, on="setup_id", how="left")
+        df = pd.merge(evals, setup_data, on="setup_id", how="left")
 
     if parameters_in_separate_columns:
         df = pd.concat([df.drop("parameters", axis=1), df["parameters"].apply(pd.Series)], axis=1)
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index 6fcaea2d4..0127309a7 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -41,7 +41,7 @@ def test_evaluation_list_filter_task(self):
 
         task_id = 7312
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", task=[task_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", tasks=[task_id])
 
         self.assertGreater(len(evaluations), 100)
         for run_id in evaluations.keys():
@@ -56,7 +56,7 @@ def test_evaluation_list_filter_uploader_ID_16(self):
 
         uploader_id = 16
         evaluations = openml.evaluations.list_evaluations(
-            "predictive_accuracy", uploader=[uploader_id], output_format="dataframe"
+            "predictive_accuracy", uploaders=[uploader_id], output_format="dataframe"
         )
         self.assertEqual(evaluations["uploader"].unique(), [uploader_id])
 
@@ -66,7 +66,7 @@ def test_evaluation_list_filter_uploader_ID_10(self):
         openml.config.server = self.production_server
 
         setup_id = 10
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setups=[setup_id])
 
         self.assertGreater(len(evaluations), 50)
         for run_id in evaluations.keys():
@@ -81,7 +81,7 @@ def test_evaluation_list_filter_flow(self):
 
         flow_id = 100
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flow=[flow_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flows=[flow_id])
 
         self.assertGreater(len(evaluations), 2)
         for run_id in evaluations.keys():
@@ -96,7 +96,7 @@ def test_evaluation_list_filter_run(self):
 
         run_id = 12
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", run=[run_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", runs=[run_id])
 
         self.assertEqual(len(evaluations), 1)
         for run_id in evaluations.keys():
@@ -132,9 +132,9 @@ def test_evaluation_list_per_fold(self):
             "predictive_accuracy",
             size=size,
             offset=0,
-            task=task_ids,
-            flow=flow_ids,
-            uploader=uploader_ids,
+            tasks=task_ids,
+            flows=flow_ids,
+            uploaders=uploader_ids,
             per_fold=True,
         )
 
@@ -149,9 +149,9 @@ def test_evaluation_list_per_fold(self):
             "predictive_accuracy",
             size=size,
             offset=0,
-            task=task_ids,
-            flow=flow_ids,
-            uploader=uploader_ids,
+            tasks=task_ids,
+            flows=flow_ids,
+            uploaders=uploader_ids,
             per_fold=False,
         )
         for run_id in evaluations.keys():
@@ -164,11 +164,11 @@ def test_evaluation_list_sort(self):
         task_id = 6
         # Get all evaluations of the task
         unsorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", offset=0, task=[task_id]
+            "predictive_accuracy", offset=0, tasks=[task_id]
         )
         # Get top 10 evaluations of the same task
         sorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", size=size, offset=0, task=[task_id], sort_order="desc"
+            "predictive_accuracy", size=size, offset=0, tasks=[task_id], sort_order="desc"
         )
         self.assertEqual(len(sorted_eval), size)
         self.assertGreater(len(unsorted_eval), 0)
@@ -191,11 +191,11 @@ def test_list_evaluations_setups_filter_flow(self):
         openml.config.server = self.production_server
         flow_id = [405]
         size = 100
-        evals = self._check_list_evaluation_setups(flow=flow_id, size=size)
+        evals = self._check_list_evaluation_setups(flows=flow_id, size=size)
         # check if parameters in separate columns works
         evals_cols = openml.evaluations.list_evaluations_setups(
             "predictive_accuracy",
-            flow=flow_id,
+            flows=flow_id,
             size=size,
             sort_order="desc",
             output_format="dataframe",
@@ -209,4 +209,4 @@ def test_list_evaluations_setups_filter_task(self):
         openml.config.server = self.production_server
         task_id = [6]
         size = 121
-        self._check_list_evaluation_setups(task=task_id, size=size)
+        self._check_list_evaluation_setups(tasks=task_id, size=size)
diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py
index 61b6c359e..5715b570a 100644
--- a/tests/test_evaluations/test_evaluations_example.py
+++ b/tests/test_evaluations/test_evaluations_example.py
@@ -14,8 +14,8 @@ def test_example_python_paper(self):
 
         df = openml.evaluations.list_evaluations_setups(
             "predictive_accuracy",
-            flow=[8353],
-            task=[6],
+            flows=[8353],
+            tasks=[6],
             output_format="dataframe",
             parameters_in_separate_columns=True,
         )  # Choose an SVM flow, for example 8353, and a task.

From 16700507289c6eb3b9b2b664688eb817d2451b99 Mon Sep 17 00:00:00 2001
From: marcoslbueno <38478211+marcoslbueno@users.noreply.github.com>
Date: Tue, 14 Jul 2020 12:21:08 +0200
Subject: [PATCH 29/48] adding config file to user guide (#931)

* adding config file to user guide

* finished requested changes
---
 doc/usage.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/doc/usage.rst b/doc/usage.rst
index 36c8584ff..d7ad0d523 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -29,6 +29,35 @@ the OpenML Python connector, followed up by a simple example.
 
 * `Introduction <examples/introduction_tutorial.html>`_
 
+~~~~~~~~~~~~~
+Configuration
+~~~~~~~~~~~~~
+
+The configuration file resides in a directory ``.openml`` in the home
+directory of the user and is called config. It consists of ``key = value`` pairs
+which are separated by newlines. The following keys are defined:
+
+* apikey:
+    * required to access the server. The `OpenML setup <https://openml.github.io/openml-python/master/examples/20_basic/introduction_tutorial.html#authentication>`_ describes how to obtain an API key.
+
+* server:
+    * default: ``http://www.openml.org``. Alternatively, use ``test.openml.org`` for the test server.
+
+* cachedir:
+    * if not given, will default to ``~/.openml/cache``
+
+* avoid_duplicate_runs:
+    * if set to ``True``, when ``run_flow_on_task`` or similar methods are called a lookup is performed to see if there already exists such a run on the server. If so, download those results instead.
+    * if not given, will default to ``True``.
+
+* connection_n_retries:
+    * number of connection retries.
+    * default: 2. Maximum number of retries: 20.
+
+* verbosity:
+    * 0: normal output
+    * 1: info output
+    * 2: debug output
 
 ~~~~~~~~~~~~
 Key concepts

From 9c93f5b06a9802ae283ccba9d36a5e426378494a Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Thu, 23 Jul 2020 13:08:52 +0200
Subject: [PATCH 30/48] Edit api (#935)

* version1

* minor fixes

* tests

* reformat code

* check new version

* remove get data

* code format

* review comments

* fix duplicate

* type annotate

* example

* tests for exceptions

* fix pep8

* black format
---
 doc/progress.rst                              |   2 +-
 examples/30_extended/datasets_tutorial.py     |  43 ++++-
 openml/datasets/functions.py                  | 148 ++++++++++++++++++
 tests/test_datasets/test_dataset_functions.py |  81 +++++++++-
 4 files changed, 269 insertions(+), 5 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 976c5c750..ef5ed6bae 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.11.0
 ~~~~~~
-
+* ADD #929: Add data edit API
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server.
 * FIX #885: Logger no longer registered by default. Added utility functions to easily register
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index d7971d0f1..40b35bbea 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -5,12 +5,13 @@
 
 How to list and download datasets.
 """
-############################################################################
+""
 
 # License: BSD 3-Clauses
 
 import openml
 import pandas as pd
+from openml.datasets.functions import edit_dataset, get_dataset
 
 ############################################################################
 # Exercise 0
@@ -42,9 +43,9 @@
 # * Find a dataset called 'eeg_eye_state'.
 # * Find all datasets with more than 50 classes.
 datalist[datalist.NumberOfInstances > 10000].sort_values(["NumberOfInstances"]).head(n=20)
-############################################################################
+""
 datalist.query('name == "eeg-eye-state"')
-############################################################################
+""
 datalist.query("NumberOfClasses > 50")
 
 ############################################################################
@@ -108,3 +109,39 @@
     alpha=0.8,
     cmap="plasma",
 )
+
+
+############################################################################
+# Edit a created dataset
+# =================================================
+# This example uses the test server, to avoid editing a dataset on the main server.
+openml.config.start_using_configuration_for_example()
+############################################################################
+# Changes to these field edits existing version: allowed only for dataset owner
+data_id = edit_dataset(
+    564,
+    description="xor dataset represents XOR operation",
+    contributor="",
+    collection_date="2019-10-29 17:06:18",
+    original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
+    paper_url="",
+    citation="kaggle",
+    language="English",
+)
+edited_dataset = get_dataset(data_id)
+print(f"Edited dataset ID: {data_id}")
+
+
+############################################################################
+# Changes to these fields: attributes, default_target_attribute,
+# row_id_attribute, ignore_attribute generates a new edited version: allowed for anyone
+
+new_attributes = [
+    ("x0", "REAL"),
+    ("x1", "REAL"),
+    ("y", "REAL"),
+]
+data_id = edit_dataset(564, attributes=new_attributes)
+print(f"Edited dataset ID: {data_id}")
+
+openml.config.stop_using_configuration_for_example()
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 79fa82867..4446f0e90 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -799,6 +799,154 @@ def status_update(data_id, status):
         raise ValueError("Data id/status does not collide")
 
 
+def edit_dataset(
+    data_id,
+    description=None,
+    creator=None,
+    contributor=None,
+    collection_date=None,
+    language=None,
+    attributes=None,
+    data=None,
+    default_target_attribute=None,
+    ignore_attribute=None,
+    citation=None,
+    row_id_attribute=None,
+    original_data_url=None,
+    paper_url=None,
+) -> int:
+    """
+      Edits an OpenMLDataset.
+      Specify atleast one field to edit, apart from data_id
+       - For certain fields, a new dataset version is created : attributes, data,
+       default_target_attribute, ignore_attribute, row_id_attribute.
+
+       - For other fields, the uploader can edit the exisiting version.
+        Noone except the uploader can edit the exisitng version.
+
+      Parameters
+      ----------
+      data_id : int
+          ID of the dataset.
+      description : str
+          Description of the dataset.
+      creator : str
+          The person who created the dataset.
+      contributor : str
+          People who contributed to the current version of the dataset.
+      collection_date : str
+          The date the data was originally collected, given by the uploader.
+      language : str
+          Language in which the data is represented.
+          Starts with 1 upper case letter, rest lower case, e.g. 'English'.
+      attributes : list, dict, or 'auto'
+          A list of tuples. Each tuple consists of the attribute name and type.
+          If passing a pandas DataFrame, the attributes can be automatically
+          inferred by passing ``'auto'``. Specific attributes can be manually
+          specified by a passing a dictionary where the key is the name of the
+          attribute and the value is the data type of the attribute.
+      data : ndarray, list, dataframe, coo_matrix, shape (n_samples, n_features)
+          An array that contains both the attributes and the targets. When
+          providing a dataframe, the attribute names and type can be inferred by
+          passing ``attributes='auto'``.
+          The target feature is indicated as meta-data of the dataset.
+      default_target_attribute : str
+          The default target attribute, if it exists.
+          Can have multiple values, comma separated.
+      ignore_attribute : str | list
+          Attributes that should be excluded in modelling,
+          such as identifiers and indexes.
+      citation : str
+          Reference(s) that should be cited when building on this data.
+      row_id_attribute : str, optional
+          The attribute that represents the row-id column, if present in the
+          dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
+          specified, the index of the dataframe will be used as the
+          ``row_id_attribute``. If the name of the index is ``None``, it will
+          be discarded.
+
+          .. versionadded: 0.8
+              Inference of ``row_id_attribute`` from a dataframe.
+      original_data_url : str, optional
+          For derived data, the url to the original dataset.
+      paper_url : str, optional
+          Link to a paper describing the dataset.
+
+
+      Returns
+      -------
+      data_id of the existing edited version or the new version created and published"""
+    if not isinstance(data_id, int):
+        raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
+
+    # case 1, changing these fields creates a new version of the dataset with changed field
+    if any(
+        field is not None
+        for field in [
+            data,
+            attributes,
+            default_target_attribute,
+            row_id_attribute,
+            ignore_attribute,
+        ]
+    ):
+        logger.warning("Creating a new version of dataset, cannot edit existing version")
+        dataset = get_dataset(data_id)
+
+        decoded_arff = dataset._get_arff(format="arff")
+        data_old = decoded_arff["data"]
+        data_new = data if data is not None else data_old
+        dataset_new = create_dataset(
+            name=dataset.name,
+            description=description or dataset.description,
+            creator=creator or dataset.creator,
+            contributor=contributor or dataset.contributor,
+            collection_date=collection_date or dataset.collection_date,
+            language=language or dataset.language,
+            licence=dataset.licence,
+            attributes=attributes or decoded_arff["attributes"],
+            data=data_new,
+            default_target_attribute=default_target_attribute or dataset.default_target_attribute,
+            ignore_attribute=ignore_attribute or dataset.ignore_attribute,
+            citation=citation or dataset.citation,
+            row_id_attribute=row_id_attribute or dataset.row_id_attribute,
+            original_data_url=original_data_url or dataset.original_data_url,
+            paper_url=paper_url or dataset.paper_url,
+            update_comment=dataset.update_comment,
+            version_label=dataset.version_label,
+        )
+        dataset_new.publish()
+        return dataset_new.dataset_id
+
+    # case 2, changing any of these fields will update existing dataset
+    # compose data edit parameters as xml
+    form_data = {"data_id": data_id}
+    xml = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
+    xml["oml:data_edit_parameters"] = OrderedDict()
+    xml["oml:data_edit_parameters"]["@xmlns:oml"] = "http://openml.org/openml"
+    xml["oml:data_edit_parameters"]["oml:description"] = description
+    xml["oml:data_edit_parameters"]["oml:creator"] = creator
+    xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
+    xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
+    xml["oml:data_edit_parameters"]["oml:language"] = language
+    xml["oml:data_edit_parameters"]["oml:citation"] = citation
+    xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
+    xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url
+
+    # delete None inputs
+    for k in list(xml["oml:data_edit_parameters"]):
+        if not xml["oml:data_edit_parameters"][k]:
+            del xml["oml:data_edit_parameters"][k]
+
+    file_elements = {"edit_parameters": ("description.xml", xmltodict.unparse(xml))}
+    result_xml = openml._api_calls._perform_api_call(
+        "data/edit", "post", data=form_data, file_elements=file_elements
+    )
+    result = xmltodict.parse(result_xml)
+    data_id = result["oml:data_edit"]["oml:id"]
+    return int(data_id)
+
+
 def _get_dataset_description(did_cache_dir, dataset_id):
     """Get the dataset description as xml dictionary.
 
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 958d28d94..c196ea36e 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -16,11 +16,17 @@
 
 import openml
 from openml import OpenMLDataset
-from openml.exceptions import OpenMLCacheException, OpenMLHashException, OpenMLPrivateDatasetError
+from openml.exceptions import (
+    OpenMLCacheException,
+    OpenMLHashException,
+    OpenMLPrivateDatasetError,
+    OpenMLServerException,
+)
 from openml.testing import TestBase
 from openml.utils import _tag_entity, _create_cache_directory_for_id
 from openml.datasets.functions import (
     create_dataset,
+    edit_dataset,
     attributes_arff_from_df,
     _get_cached_dataset,
     _get_cached_dataset_features,
@@ -1331,3 +1337,76 @@ def test_get_dataset_cache_format_feather(self):
         self.assertEqual(X.shape, (150, 5))
         self.assertEqual(len(categorical), X.shape[1])
         self.assertEqual(len(attribute_names), X.shape[1])
+
+    def test_data_edit(self):
+
+        # admin key for test server (only admins or owners can edit datasets).
+        # all users can edit their own datasets)
+        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
+
+        # case 1, editing description, creator, contributor, collection_date, original_data_url,
+        # paper_url, citation, language edits existing dataset.
+        did = 564
+        result = edit_dataset(
+            did,
+            description="xor dataset represents XOR operation",
+            contributor="",
+            collection_date="2019-10-29 17:06:18",
+            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
+            paper_url="",
+            citation="kaggle",
+            language="English",
+        )
+        self.assertEqual(result, did)
+
+        # case 2, editing data, attributes, default_target_attribute, row_id_attribute,
+        # ignore_attribute generates a new dataset
+
+        column_names = [
+            ("input1", "REAL"),
+            ("input2", "REAL"),
+            ("y", "REAL"),
+        ]
+        desc = "xor dataset represents XOR operation"
+        result = edit_dataset(
+            564,
+            description=desc,
+            contributor="",
+            collection_date="2019-10-29 17:06:18",
+            attributes=column_names,
+            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
+            paper_url="",
+            citation="kaggle",
+            language="English",
+        )
+        self.assertNotEqual(did, result)
+
+    def test_data_edit_errors(self):
+
+        # admin key for test server (only admins or owners can edit datasets).
+        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
+        # Check server exception when no field to edit is provided
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Please provide atleast one field among description, creator, contributor, "
+            "collection_date, language, citation, original_data_url or paper_url to edit.",
+            edit_dataset,
+            data_id=564,
+        )
+        # Check server exception when unknown dataset is provided
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Unknown dataset",
+            edit_dataset,
+            data_id=100000,
+            description="xor operation dataset",
+        )
+        # Check server exception when a non-owner or non-admin tries to edit existing dataset
+        openml.config.apikey = "5f0b74b33503e4ad4a7181a91e28719f"
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Dataset is not owned by you",
+            edit_dataset,
+            data_id=564,
+            description="xor data",
+        )

From 666ca68790be90ae1153a6c355b7c1ad9921ef52 Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Mon, 3 Aug 2020 11:01:25 +0200
Subject: [PATCH 31/48] Adding support for scikit-learn > 0.22 (#936)

* Preliminary changes

* Updating unit tests for sklearn 0.22 and above

* Triggering sklearn tests + fixes

* Refactoring to inspect.signature in extensions
---
 .travis.yml                                   |   6 +-
 openml/extensions/sklearn/extension.py        |  18 +-
 .../test_sklearn_extension.py                 | 196 ++++++++++++------
 tests/test_flows/test_flow.py                 |  77 +++++--
 tests/test_runs/test_run_functions.py         |  10 +-
 5 files changed, 216 insertions(+), 91 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dcfda6d37..7360339ac 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,9 +15,13 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
   - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
   # Checks for older scikit-learn versions (which also don't nicely work with
   # Python3.7)
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index af0b42144..fe9d029aa 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -994,12 +994,16 @@ def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> Tuple[Dict, Set]
             a set with all parameters that do not have a default value
         """
         # parameters with defaults are optional, all others are required.
-        signature = inspect.getfullargspec(fn_name)
-        if signature.defaults:
-            optional_params = dict(zip(reversed(signature.args), reversed(signature.defaults)))
-        else:
-            optional_params = dict()
-        required_params = {arg for arg in signature.args if arg not in optional_params}
+        parameters = inspect.signature(fn_name).parameters
+        required_params = set()
+        optional_params = dict()
+        for param in parameters.keys():
+            parameter = parameters.get(param)
+            default_val = parameter.default  # type: ignore
+            if default_val is inspect.Signature.empty:
+                required_params.add(param)
+            else:
+                optional_params[param] = default_val
         return optional_params, required_params
 
     def _deserialize_model(
@@ -1346,7 +1350,7 @@ def _can_measure_cputime(self, model: Any) -> bool:
         # check the parameters for n_jobs
         n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs")
         for val in n_jobs_vals:
-            if val is not None and val != 1:
+            if val is not None and val != 1 and val != "deprecated":
                 return False
         return True
 
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 48832b58f..acc93b024 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -77,11 +77,14 @@ def test_serialize_model(self):
                 criterion="entropy", max_features="auto", max_leaf_nodes=2000
             )
 
-            fixture_name = "sklearn.tree.tree.DecisionTreeClassifier"
+            tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
+            fixture_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
             fixture_short_name = "sklearn.DecisionTreeClassifier"
             # str obtained from self.extension._get_sklearn_description(model)
             fixture_description = "A decision tree classifier."
             version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+
+            presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"'
             # min_impurity_decrease has been introduced in 0.20
             # min_impurity_split has been deprecated in 0.20
             if LooseVersion(sklearn.__version__) < "0.19":
@@ -114,12 +117,16 @@ def test_serialize_model(self):
                         ("min_samples_leaf", "1"),
                         ("min_samples_split", "2"),
                         ("min_weight_fraction_leaf", "0.0"),
-                        ("presort", "false"),
+                        ("presort", presort_val),
                         ("random_state", "null"),
                         ("splitter", '"best"'),
                     )
                 )
-            structure_fixture = {"sklearn.tree.tree.DecisionTreeClassifier": []}
+            if LooseVersion(sklearn.__version__) >= "0.22":
+                fixture_parameters.update({"ccp_alpha": "0.0"})
+                fixture_parameters.move_to_end("ccp_alpha", last=False)
+
+            structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []}
 
             serialization = self.extension.model_to_flow(model)
             structure = serialization.get_structure("name")
@@ -161,11 +168,18 @@ def test_serialize_model_clustering(self):
         with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
             model = sklearn.cluster.KMeans()
 
-            fixture_name = "sklearn.cluster.k_means_.KMeans"
+            cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
+            fixture_name = "sklearn.cluster.{}.KMeans".format(cluster_name)
             fixture_short_name = "sklearn.KMeans"
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = "K-Means clustering"
+            fixture_description = "K-Means clustering{}".format(
+                "" if LooseVersion(sklearn.__version__) < "0.22" else "."
+            )
             version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+
+            n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+            precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+
             # n_jobs default has changed to None in 0.20
             if LooseVersion(sklearn.__version__) < "0.20":
                 fixture_parameters = OrderedDict(
@@ -192,14 +206,14 @@ def test_serialize_model_clustering(self):
                         ("max_iter", "300"),
                         ("n_clusters", "8"),
                         ("n_init", "10"),
-                        ("n_jobs", "null"),
-                        ("precompute_distances", '"auto"'),
+                        ("n_jobs", n_jobs_val),
+                        ("precompute_distances", precomp_val),
                         ("random_state", "null"),
                         ("tol", "0.0001"),
                         ("verbose", "0"),
                     )
                 )
-            fixture_structure = {"sklearn.cluster.k_means_.KMeans": []}
+            fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
 
             serialization = self.extension.model_to_flow(model)
             structure = serialization.get_structure("name")
@@ -230,11 +244,15 @@ def test_serialize_model_with_subcomponent(self):
             n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier()
         )
 
+        weight_name = "{}weight_boosting".format(
+            "" if LooseVersion(sklearn.__version__) < "0.22" else "_"
+        )
+        tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
         fixture_name = (
-            "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
-            "(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)"
+            "sklearn.ensemble.{}.AdaBoostClassifier"
+            "(base_estimator=sklearn.tree.{}.DecisionTreeClassifier)".format(weight_name, tree_name)
         )
-        fixture_class_name = "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
+        fixture_class_name = "sklearn.ensemble.{}.AdaBoostClassifier".format(weight_name)
         fixture_short_name = "sklearn.AdaBoostClassifier"
         # str obtained from self.extension._get_sklearn_description(model)
         fixture_description = (
@@ -246,13 +264,13 @@ def test_serialize_model_with_subcomponent(self):
             " on difficult cases.\n\nThis class implements the algorithm known "
             "as AdaBoost-SAMME [2]."
         )
-        fixture_subcomponent_name = "sklearn.tree.tree.DecisionTreeClassifier"
-        fixture_subcomponent_class_name = "sklearn.tree.tree.DecisionTreeClassifier"
+        fixture_subcomponent_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
+        fixture_subcomponent_class_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
         # str obtained from self.extension._get_sklearn_description(model.base_estimator)
         fixture_subcomponent_description = "A decision tree classifier."
         fixture_structure = {
             fixture_name: [],
-            "sklearn.tree.tree.DecisionTreeClassifier": ["base_estimator"],
+            "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["base_estimator"],
         }
 
         serialization = self.extension.model_to_flow(model)
@@ -298,10 +316,11 @@ def test_serialize_pipeline(self):
         dummy = sklearn.dummy.DummyClassifier(strategy="prior")
         model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("dummy", dummy)])
 
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "dummy=sklearn.dummy.DummyClassifier)"
+            "scaler=sklearn.preprocessing.{}.StandardScaler,"
+            "dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
 
@@ -327,7 +346,7 @@ def test_serialize_pipeline(self):
 
         fixture_structure = {
             fixture_name: [],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
             "sklearn.dummy.DummyClassifier": ["dummy"],
         }
 
@@ -402,10 +421,12 @@ def test_serialize_pipeline_clustering(self):
         km = sklearn.cluster.KMeans()
         model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("clusterer", km)])
 
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "clusterer=sklearn.cluster.k_means_.KMeans)"
+            "scaler=sklearn.preprocessing.{}.StandardScaler,"
+            "clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
 
@@ -430,10 +451,9 @@ def test_serialize_pipeline_clustering(self):
             fixture_description = self.extension._get_sklearn_description(model)
         fixture_structure = {
             fixture_name: [],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
-            "sklearn.cluster.k_means_.KMeans": ["clusterer"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
+            "sklearn.cluster.{}.KMeans".format(cluster_name): ["clusterer"],
         }
-
         serialization = self.extension.model_to_flow(model)
         structure = serialization.get_structure("name")
 
@@ -519,10 +539,12 @@ def test_serialize_column_transformer(self):
             ],
             remainder="passthrough",
         )
+
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture = (
             "sklearn.compose._column_transformer.ColumnTransformer("
-            "numeric=sklearn.preprocessing.data.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)"
+            "numeric=sklearn.preprocessing.{}.StandardScaler,"
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)".format(scaler_name)
         )
         fixture_short_name = "sklearn.ColumnTransformer"
 
@@ -543,7 +565,7 @@ def test_serialize_column_transformer(self):
 
         fixture_structure = {
             fixture: [],
-            "sklearn.preprocessing.data.StandardScaler": ["numeric"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["numeric"],
             "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"],
         }
 
@@ -587,21 +609,26 @@ def test_serialize_column_transformer_pipeline(self):
         model = sklearn.pipeline.Pipeline(
             steps=[("transformer", inner), ("classifier", sklearn.tree.DecisionTreeClassifier())]
         )
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
             "transformer=sklearn.compose._column_transformer."
             "ColumnTransformer("
-            "numeric=sklearn.preprocessing.data.StandardScaler,"
+            "numeric=sklearn.preprocessing.{}.StandardScaler,"
             "nominal=sklearn.preprocessing._encoders.OneHotEncoder),"
-            "classifier=sklearn.tree.tree.DecisionTreeClassifier)"
+            "classifier=sklearn.tree.{}.DecisionTreeClassifier)".format(scaler_name, tree_name)
         )
         fixture_structure = {
-            "sklearn.preprocessing.data.StandardScaler": ["transformer", "numeric"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): [
+                "transformer",
+                "numeric",
+            ],
             "sklearn.preprocessing._encoders.OneHotEncoder": ["transformer", "nominal"],
             "sklearn.compose._column_transformer.ColumnTransformer(numeric="
-            "sklearn.preprocessing.data.StandardScaler,nominal=sklearn."
-            "preprocessing._encoders.OneHotEncoder)": ["transformer"],
-            "sklearn.tree.tree.DecisionTreeClassifier": ["classifier"],
+            "sklearn.preprocessing.{}.StandardScaler,nominal=sklearn."
+            "preprocessing._encoders.OneHotEncoder)".format(scaler_name): ["transformer"],
+            "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["classifier"],
             fixture_name: [],
         }
 
@@ -630,6 +657,7 @@ def test_serialize_column_transformer_pipeline(self):
         structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.description, fixture_description)
+
         self.assertDictEqual(structure, fixture_structure)
         # del serialization.model
         new_model = self.extension.flow_to_model(serialization)
@@ -656,15 +684,18 @@ def test_serialize_feature_union(self):
         structure = serialization.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture_name = (
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder)
+            "scaler=sklearn.preprocessing.{}.StandardScaler)".format(
+                module_name_encoder, scaler_name
+            )
         )
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}." "OneHotEncoder".format(module_name_encoder): ["ohe"],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
         }
         self.assertEqual(serialization.name, fixture_name)
         self.assertDictEqual(structure, fixture_structure)
@@ -728,17 +759,20 @@ def test_serialize_feature_union_switched_names(self):
         fu2_serialization = self.extension.model_to_flow(fu2)
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         self.assertEqual(
             fu1_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+            "scaler=sklearn.preprocessing.{}.StandardScaler)".format(
+                module_name_encoder, scaler_name
+            ),
         )
         self.assertEqual(
             fu2_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "scaler=sklearn.preprocessing.{}.OneHotEncoder,"
-            "ohe=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+            "ohe=sklearn.preprocessing.{}.StandardScaler)".format(module_name_encoder, scaler_name),
         )
 
     def test_serialize_complex_flow(self):
@@ -766,10 +800,15 @@ def test_serialize_complex_flow(self):
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
         ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder
-        scaler_name = "sklearn.preprocessing.data.StandardScaler"
-        tree_name = "sklearn.tree.tree.DecisionTreeClassifier"
-        boosting_name = (
-            "sklearn.ensemble.weight_boosting.AdaBoostClassifier" "(base_estimator=%s)" % tree_name
+        scaler_name = "sklearn.preprocessing.{}.StandardScaler".format(
+            "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        )
+        tree_name = "sklearn.tree.{}.DecisionTreeClassifier".format(
+            "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
+        )
+        weight_name = "weight" if LooseVersion(sklearn.__version__) < "0.22" else "_weight"
+        boosting_name = "sklearn.ensemble.{}_boosting.AdaBoostClassifier(base_estimator={})".format(
+            weight_name, tree_name
         )
         pipeline_name = "sklearn.pipeline.Pipeline(ohe=%s,scaler=%s," "boosting=%s)" % (
             ohe_name,
@@ -1195,12 +1234,24 @@ def test__get_fn_arguments_with_defaults(self):
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 1),
             ]
-        else:
+        elif sklearn_version < "0.22":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
+        elif sklearn_version < "0.23":
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
+        else:
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
@@ -1225,11 +1276,18 @@ def test_deserialize_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__min_samples_leaf": 42,
-        }
+        if LooseVersion(sklearn.__version__) < "0.23":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__min_samples_leaf": 42,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__min_samples_leaf": 1,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1256,11 +1314,18 @@ def test_deserialize_adaboost_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__n_estimators": 10,
-        }
+        if LooseVersion(sklearn.__version__) < "0.22":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__n_estimators": 10,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__n_estimators": 50,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1293,14 +1358,24 @@ def test_deserialize_complex_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__n_estimators": 10,
-            "Estimator__base_estimator__n_estimators": 10,
-            "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
-        }
+        if LooseVersion(sklearn.__version__) < "0.23":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__n_estimators": 10,
+                "Estimator__base_estimator__n_estimators": 10,
+                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
+                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__n_estimators": 50,
+                "Estimator__base_estimator__n_estimators": 10,
+                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
+                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1349,7 +1424,10 @@ def test_openml_param_name_to_sklearn(self):
     def test_obtain_parameter_values_flow_not_from_server(self):
         model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
         flow = self.extension.model_to_flow(model)
-        msg = "Flow sklearn.linear_model.logistic.LogisticRegression has no " "flow_id!"
+        logistic_name = "logistic" if LooseVersion(sklearn.__version__) < "0.22" else "_logistic"
+        msg = "Flow sklearn.linear_model.{}.LogisticRegression has no flow_id!".format(
+            logistic_name
+        )
 
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.obtain_parameter_values(flow)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 9f289870e..8d08f4eaf 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -305,15 +305,27 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
                 "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
             )
 
-        fixture = (
-            "The flow on the server is inconsistent with the local flow. "
-            "The server flow ID is 1. Please check manually and remove "
-            "the flow if necessary! Error is:\n"
-            "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
-            "values for attribute 'name' differ: "
-            "'sklearn.ensemble.forest.RandomForestClassifier'"
-            "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
-        )
+        if LooseVersion(sklearn.__version__) < "0.22":
+            fixture = (
+                "The flow on the server is inconsistent with the local flow. "
+                "The server flow ID is 1. Please check manually and remove "
+                "the flow if necessary! Error is:\n"
+                "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
+                "values for attribute 'name' differ: "
+                "'sklearn.ensemble.forest.RandomForestClassifier'"
+                "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
+            )
+        else:
+            # sklearn.ensemble.forest -> sklearn.ensemble._forest
+            fixture = (
+                "The flow on the server is inconsistent with the local flow. "
+                "The server flow ID is 1. Please check manually and remove "
+                "the flow if necessary! Error is:\n"
+                "'Flow sklearn.ensemble._forest.RandomForestClassifier: "
+                "values for attribute 'name' differ: "
+                "'sklearn.ensemble._forest.RandomForestClassifier'"
+                "\nvs\n'sklearn.ensemble._forest.RandomForestClassifie'.'"
+            )
 
         self.assertEqual(context_manager.exception.args[0], fixture)
         self.assertEqual(get_flow_mock.call_count, 2)
@@ -463,19 +475,40 @@ def test_sklearn_to_upload_to_flow(self):
 
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
-        fixture_name = (
-            "%ssklearn.model_selection._search.RandomizedSearchCV("
-            "estimator=sklearn.pipeline.Pipeline("
-            "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "fu=sklearn.pipeline.FeatureUnion("
-            "pca=sklearn.decomposition.truncated_svd.TruncatedSVD,"
-            "fs="
-            "sklearn.feature_selection.univariate_selection.SelectPercentile),"
-            "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier("
-            "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))"
-            % (sentinel, module_name_encoder)
-        )
+        if LooseVersion(sklearn.__version__) < "0.22":
+            fixture_name = (
+                "%ssklearn.model_selection._search.RandomizedSearchCV("
+                "estimator=sklearn.pipeline.Pipeline("
+                "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
+                "scaler=sklearn.preprocessing.data.StandardScaler,"
+                "fu=sklearn.pipeline.FeatureUnion("
+                "pca=sklearn.decomposition.truncated_svd.TruncatedSVD,"
+                "fs="
+                "sklearn.feature_selection.univariate_selection.SelectPercentile),"
+                "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier("
+                "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))"
+                % (sentinel, module_name_encoder)
+            )
+        else:
+            # sklearn.sklearn.preprocessing.data -> sklearn.sklearn.preprocessing._data
+            # sklearn.sklearn.decomposition.truncated_svd -> sklearn.decomposition._truncated_svd
+            # sklearn.feature_selection.univariate_selection ->
+            #     sklearn.feature_selection._univariate_selection
+            # sklearn.ensemble.weight_boosting -> sklearn.ensemble._weight_boosting
+            # sklearn.tree.tree.DecisionTree... -> sklearn.tree._classes.DecisionTree...
+            fixture_name = (
+                "%ssklearn.model_selection._search.RandomizedSearchCV("
+                "estimator=sklearn.pipeline.Pipeline("
+                "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
+                "scaler=sklearn.preprocessing._data.StandardScaler,"
+                "fu=sklearn.pipeline.FeatureUnion("
+                "pca=sklearn.decomposition._truncated_svd.TruncatedSVD,"
+                "fs="
+                "sklearn.feature_selection._univariate_selection.SelectPercentile),"
+                "boosting=sklearn.ensemble._weight_boosting.AdaBoostClassifier("
+                "base_estimator=sklearn.tree._classes.DecisionTreeClassifier)))"
+                % (sentinel, module_name_encoder)
+            )
         self.assertEqual(new_flow.name, fixture_name)
         new_flow.model.fit(X, y)
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 74f011b7c..aca9580c9 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -199,8 +199,11 @@ def _perform_run(
         classes_without_random_state = [
             "sklearn.model_selection._search.GridSearchCV",
             "sklearn.pipeline.Pipeline",
-            "sklearn.linear_model.base.LinearRegression",
         ]
+        if LooseVersion(sklearn.__version__) < "0.22":
+            classes_without_random_state.append("sklearn.linear_model.base.LinearRegression")
+        else:
+            classes_without_random_state.append("sklearn.linear_model._base.LinearRegression")
 
         def _remove_random_state(flow):
             if "random_state" in flow.parameters:
@@ -779,10 +782,13 @@ def _test_local_evaluations(self, run):
             (sklearn.metrics.cohen_kappa_score, {"weights": None}),
             (sklearn.metrics.roc_auc_score, {}),
             (sklearn.metrics.average_precision_score, {}),
-            (sklearn.metrics.jaccard_similarity_score, {}),
             (sklearn.metrics.precision_score, {"average": "macro"}),
             (sklearn.metrics.brier_score_loss, {}),
         ]
+        if LooseVersion(sklearn.__version__) < "0.23":
+            tests.append((sklearn.metrics.jaccard_similarity_score, {}))
+        else:
+            tests.append((sklearn.metrics.jaccard_score, {}))
         for test_idx, test in enumerate(tests):
             alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],)
             self.assertEqual(len(alt_scores), 10)

From 5d9c69c210792d8b447c8b17d466ac44e41d0eb2 Mon Sep 17 00:00:00 2001
From: zikun <33176974+zikun@users.noreply.github.com>
Date: Mon, 3 Aug 2020 22:48:44 +0800
Subject: [PATCH 32/48] Add flake8-print in pre-commit (#939)

* Add flake8-print in pre-commit config

* Replace print statements with logging
---
 .flake8                                       | 2 +-
 .pre-commit-config.yaml                       | 4 ++++
 openml/extensions/sklearn/extension.py        | 2 +-
 tests/conftest.py                             | 1 -
 tests/test_datasets/test_dataset_functions.py | 4 +++-
 tests/test_study/test_study_examples.py       | 6 ++++--
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.flake8 b/.flake8
index c0fe5e06f..08bb8ea10 100644
--- a/.flake8
+++ b/.flake8
@@ -1,7 +1,7 @@
 [flake8]
 max-line-length = 100
 show-source = True
-select = C,E,F,W,B
+select = C,E,F,W,B,T
 ignore = E203, E402, W503
 per-file-ignores =
     *__init__.py:F401
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 75e53f0dd..b3a1d2aba 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,6 +19,10 @@ repos:
       - id: flake8
         name: flake8 openml
         files: openml/*
+        additional_dependencies:
+          - flake8-print==3.1.4
       - id: flake8
         name: flake8 tests
         files: tests/*
+        additional_dependencies:
+          - flake8-print==3.1.4
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index fe9d029aa..4a3015bdc 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -1316,7 +1316,7 @@ def _prevent_optimize_n_jobs(self, model):
                         "Could not find attribute "
                         "param_distributions."
                     )
-                print(
+                logger.warning(
                     "Warning! Using subclass BaseSearchCV other than "
                     "{GridSearchCV, RandomizedSearchCV}. "
                     "Should implement param check. "
diff --git a/tests/conftest.py b/tests/conftest.py
index 59fa33aca..461a513fd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -40,7 +40,6 @@
 # exploiting the fact that conftest.py always resides in the root directory for tests
 static_dir = os.path.dirname(os.path.abspath(__file__))
 logger.info("static directory: {}".format(static_dir))
-print("static directory: {}".format(static_dir))
 while True:
     if "openml" in os.listdir(static_dir):
         break
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index c196ea36e..a3be7b2b7 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1160,7 +1160,9 @@ def test_publish_fetch_ignore_attribute(self):
             except Exception as e:
                 # returned code 273: Dataset not processed yet
                 # returned code 362: No qualities found
-                print("Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e)))
+                TestBase.logger.error(
+                    "Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e))
+                )
                 time.sleep(10)
                 continue
         if downloaded_dataset is None:
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 2c403aa84..14e2405f2 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -48,10 +48,12 @@ def test_Figure1a(self):
                 clf, task, avoid_duplicate_runs=False
             )  # run classifier on splits (requires API key)
             score = run.get_metric_fn(sklearn.metrics.accuracy_score)  # print accuracy score
-            print("Data set: %s; Accuracy: %0.2f" % (task.get_dataset().name, score.mean()))
+            TestBase.logger.info(
+                "Data set: %s; Accuracy: %0.2f" % (task.get_dataset().name, score.mean())
+            )
             run.publish()  # publish the experiment on OpenML (optional)
             TestBase._mark_entity_for_removal("run", run.run_id)
             TestBase.logger.info(
                 "collected from {}: {}".format(__file__.split("/")[-1], run.run_id)
             )
-            print("URL for run: %s/run/%d" % (openml.config.server, run.run_id))
+            TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id))

From 7d51a766f0d5540d416de3f149645a3b6ad4b282 Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Fri, 7 Aug 2020 10:05:40 +0200
Subject: [PATCH 33/48] Fix edit api (#940)

* fix edit api
---
 openml/datasets/functions.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 4446f0e90..bda02d419 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -891,10 +891,18 @@ def edit_dataset(
         ]
     ):
         logger.warning("Creating a new version of dataset, cannot edit existing version")
+
+        # Get old dataset and features
         dataset = get_dataset(data_id)
+        df, y, categorical, attribute_names = dataset.get_data(dataset_format="dataframe")
+        attributes_old = attributes_arff_from_df(df)
 
-        decoded_arff = dataset._get_arff(format="arff")
-        data_old = decoded_arff["data"]
+        # Sparse data needs to be provided in a different format from dense data
+        if dataset.format == "sparse_arff":
+            df, y, categorical, attribute_names = dataset.get_data(dataset_format="array")
+            data_old = coo_matrix(df)
+        else:
+            data_old = df
         data_new = data if data is not None else data_old
         dataset_new = create_dataset(
             name=dataset.name,
@@ -904,7 +912,7 @@ def edit_dataset(
             collection_date=collection_date or dataset.collection_date,
             language=language or dataset.language,
             licence=dataset.licence,
-            attributes=attributes or decoded_arff["attributes"],
+            attributes=attributes or attributes_old,
             data=data_new,
             default_target_attribute=default_target_attribute or dataset.default_target_attribute,
             ignore_attribute=ignore_attribute or dataset.ignore_attribute,

From 5d2e0ce980bfee2de5197e27c1e03c7518665a3b Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Mon, 17 Aug 2020 10:42:59 +0200
Subject: [PATCH 34/48] Adding Python 3.8 support (#916)

* Adding Python 3.8 support

* Fixing indentation

* Execute test cases for 3.8

* Testing

* Making install script fail
---
 .travis.yml           | 26 ++++++++++++++------------
 ci_scripts/install.sh |  2 ++
 setup.py              |  1 +
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7360339ac..80f3bda42 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,18 +15,20 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
-  # Checks for older scikit-learn versions (which also don't nicely work with
-  # Python3.7)
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
+    # Checks for older scikit-learn versions (which also don't nicely work with
+    # Python3.7)
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
 
 # Travis issue
 # https://github.com/travis-ci/travis-ci/issues/8920
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 67cd1bb38..29181c5c4 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -1,5 +1,7 @@
 # License: BSD 3-Clause
 
+set -e
+
 # Deactivate the travis-provided virtual environment and setup a
 # conda-based environment instead
 deactivate
diff --git a/setup.py b/setup.py
index f1f7a5871..476becc10 100644
--- a/setup.py
+++ b/setup.py
@@ -96,5 +96,6 @@
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
     ],
 )

From f70c720c1624e3fadc52909885a4d3a096cd7214 Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Mon, 31 Aug 2020 20:27:31 +0200
Subject: [PATCH 35/48] change edit_api to reflect server (#941)

* change edit_api to reflect server

* change test and example to reflect rest API changes

* tutorial comments

* Update datasets_tutorial.py
---
 examples/30_extended/datasets_tutorial.py     | 38 ++++----
 openml/datasets/functions.py                  | 64 +-------------
 tests/test_datasets/test_dataset_functions.py | 87 +++++++++----------
 3 files changed, 64 insertions(+), 125 deletions(-)

diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 40b35bbea..e129b7718 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -21,7 +21,7 @@
 #
 #   * Use the output_format parameter to select output type
 #   * Default gives 'dict' (other option: 'dataframe', see below)
-
+#
 openml_list = openml.datasets.list_datasets()  # returns a dict
 
 # Show a nice table with some key data properties
@@ -117,15 +117,21 @@
 # This example uses the test server, to avoid editing a dataset on the main server.
 openml.config.start_using_configuration_for_example()
 ############################################################################
-# Changes to these field edits existing version: allowed only for dataset owner
+# Edit non-critical fields, allowed for all authorized users:
+# description, creator, contributor, collection_date, language, citation,
+# original_data_url, paper_url
+desc = (
+    "This data sets consists of 3 different types of irises' "
+    "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+    " stored in a 150x4 numpy.ndarray"
+)
+did = 128
 data_id = edit_dataset(
-    564,
-    description="xor dataset represents XOR operation",
-    contributor="",
-    collection_date="2019-10-29 17:06:18",
-    original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-    paper_url="",
-    citation="kaggle",
+    did,
+    description=desc,
+    creator="R.A.Fisher",
+    collection_date="1937",
+    citation="The use of multiple measurements in taxonomic problems",
     language="English",
 )
 edited_dataset = get_dataset(data_id)
@@ -133,15 +139,11 @@
 
 
 ############################################################################
-# Changes to these fields: attributes, default_target_attribute,
-# row_id_attribute, ignore_attribute generates a new edited version: allowed for anyone
-
-new_attributes = [
-    ("x0", "REAL"),
-    ("x1", "REAL"),
-    ("y", "REAL"),
-]
-data_id = edit_dataset(564, attributes=new_attributes)
+# Edit critical fields, allowed only for owners of the dataset:
+# default_target_attribute, row_id_attribute, ignore_attribute
+# To edit critical fields of a dataset owned by you, configure the API key:
+# openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
+data_id = edit_dataset(564, default_target_attribute="y")
 print(f"Edited dataset ID: {data_id}")
 
 openml.config.stop_using_configuration_for_example()
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index bda02d419..0f3037a74 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -806,8 +806,6 @@ def edit_dataset(
     contributor=None,
     collection_date=None,
     language=None,
-    attributes=None,
-    data=None,
     default_target_attribute=None,
     ignore_attribute=None,
     citation=None,
@@ -839,17 +837,6 @@ def edit_dataset(
       language : str
           Language in which the data is represented.
           Starts with 1 upper case letter, rest lower case, e.g. 'English'.
-      attributes : list, dict, or 'auto'
-          A list of tuples. Each tuple consists of the attribute name and type.
-          If passing a pandas DataFrame, the attributes can be automatically
-          inferred by passing ``'auto'``. Specific attributes can be manually
-          specified by a passing a dictionary where the key is the name of the
-          attribute and the value is the data type of the attribute.
-      data : ndarray, list, dataframe, coo_matrix, shape (n_samples, n_features)
-          An array that contains both the attributes and the targets. When
-          providing a dataframe, the attribute names and type can be inferred by
-          passing ``attributes='auto'``.
-          The target feature is indicated as meta-data of the dataset.
       default_target_attribute : str
           The default target attribute, if it exists.
           Can have multiple values, comma separated.
@@ -879,54 +866,6 @@ def edit_dataset(
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
 
-    # case 1, changing these fields creates a new version of the dataset with changed field
-    if any(
-        field is not None
-        for field in [
-            data,
-            attributes,
-            default_target_attribute,
-            row_id_attribute,
-            ignore_attribute,
-        ]
-    ):
-        logger.warning("Creating a new version of dataset, cannot edit existing version")
-
-        # Get old dataset and features
-        dataset = get_dataset(data_id)
-        df, y, categorical, attribute_names = dataset.get_data(dataset_format="dataframe")
-        attributes_old = attributes_arff_from_df(df)
-
-        # Sparse data needs to be provided in a different format from dense data
-        if dataset.format == "sparse_arff":
-            df, y, categorical, attribute_names = dataset.get_data(dataset_format="array")
-            data_old = coo_matrix(df)
-        else:
-            data_old = df
-        data_new = data if data is not None else data_old
-        dataset_new = create_dataset(
-            name=dataset.name,
-            description=description or dataset.description,
-            creator=creator or dataset.creator,
-            contributor=contributor or dataset.contributor,
-            collection_date=collection_date or dataset.collection_date,
-            language=language or dataset.language,
-            licence=dataset.licence,
-            attributes=attributes or attributes_old,
-            data=data_new,
-            default_target_attribute=default_target_attribute or dataset.default_target_attribute,
-            ignore_attribute=ignore_attribute or dataset.ignore_attribute,
-            citation=citation or dataset.citation,
-            row_id_attribute=row_id_attribute or dataset.row_id_attribute,
-            original_data_url=original_data_url or dataset.original_data_url,
-            paper_url=paper_url or dataset.paper_url,
-            update_comment=dataset.update_comment,
-            version_label=dataset.version_label,
-        )
-        dataset_new.publish()
-        return dataset_new.dataset_id
-
-    # case 2, changing any of these fields will update existing dataset
     # compose data edit parameters as xml
     form_data = {"data_id": data_id}
     xml = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
@@ -937,6 +876,9 @@ def edit_dataset(
     xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
     xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
     xml["oml:data_edit_parameters"]["oml:language"] = language
+    xml["oml:data_edit_parameters"]["oml:default_target_attribute"] = default_target_attribute
+    xml["oml:data_edit_parameters"]["oml:row_id_attribute"] = row_id_attribute
+    xml["oml:data_edit_parameters"]["oml:ignore_attribute"] = ignore_attribute
     xml["oml:data_edit_parameters"]["oml:citation"] = citation
     xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
     xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index a3be7b2b7..5076d06c2 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1341,57 +1341,43 @@ def test_get_dataset_cache_format_feather(self):
         self.assertEqual(len(attribute_names), X.shape[1])
 
     def test_data_edit(self):
-
-        # admin key for test server (only admins or owners can edit datasets).
-        # all users can edit their own datasets)
-        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
-
-        # case 1, editing description, creator, contributor, collection_date, original_data_url,
-        # paper_url, citation, language edits existing dataset.
-        did = 564
-        result = edit_dataset(
-            did,
-            description="xor dataset represents XOR operation",
-            contributor="",
-            collection_date="2019-10-29 17:06:18",
-            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-            paper_url="",
-            citation="kaggle",
-            language="English",
+        # Case 1
+        # All users can edit non-critical fields of datasets
+        desc = (
+            "This data sets consists of 3 different types of irises' "
+            "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+            " stored in a 150x4 numpy.ndarray"
         )
-        self.assertEqual(result, did)
-
-        # case 2, editing data, attributes, default_target_attribute, row_id_attribute,
-        # ignore_attribute generates a new dataset
-
-        column_names = [
-            ("input1", "REAL"),
-            ("input2", "REAL"),
-            ("y", "REAL"),
-        ]
-        desc = "xor dataset represents XOR operation"
+        did = 128
         result = edit_dataset(
-            564,
+            did,
             description=desc,
-            contributor="",
-            collection_date="2019-10-29 17:06:18",
-            attributes=column_names,
-            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-            paper_url="",
-            citation="kaggle",
+            creator="R.A.Fisher",
+            collection_date="1937",
+            citation="The use of multiple measurements in taxonomic problems",
             language="English",
         )
-        self.assertNotEqual(did, result)
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.description, desc)
+
+        # Case 2
+        # only owners (or admin) can edit all critical fields of datasets
+        # this is a dataset created by CI, so it is editable by this test
+        did = 315
+        result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
 
     def test_data_edit_errors(self):
-
-        # admin key for test server (only admins or owners can edit datasets).
-        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
         # Check server exception when no field to edit is provided
         self.assertRaisesRegex(
             OpenMLServerException,
-            "Please provide atleast one field among description, creator, contributor, "
-            "collection_date, language, citation, original_data_url or paper_url to edit.",
+            "Please provide atleast one field among description, creator, "
+            "contributor, collection_date, language, citation, "
+            "original_data_url, default_target_attribute, row_id_attribute, "
+            "ignore_attribute or paper_url to edit.",
             edit_dataset,
             data_id=564,
         )
@@ -1403,12 +1389,21 @@ def test_data_edit_errors(self):
             data_id=100000,
             description="xor operation dataset",
         )
-        # Check server exception when a non-owner or non-admin tries to edit existing dataset
-        openml.config.apikey = "5f0b74b33503e4ad4a7181a91e28719f"
+        # Check server exception when owner/admin edits critical features of dataset with tasks
         self.assertRaisesRegex(
             OpenMLServerException,
-            "Dataset is not owned by you",
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can only be edited for datasets without any tasks.",
             edit_dataset,
-            data_id=564,
-            description="xor data",
+            data_id=223,
+            default_target_attribute="y",
+        )
+        # Check server exception when a non-owner or non-admin tries to edit critical features
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can be edited only by the owner. Fork the dataset if changes are required.",
+            edit_dataset,
+            data_id=128,
+            default_target_attribute="y",
         )

From a442688793acca9caacd8408da4ed48f507b977e Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 2 Sep 2020 08:38:15 +0200
Subject: [PATCH 36/48] Feature #753 (#932)

* Create first section: Creating Custom Flow

* Add Section: Using the Flow

It is incomplete as while trying to explain how to format the
predictions, I realized a utility function is required.

* Allow run description text to be custom

Previously the description text that accompanies the prediction file was
auto-generated with the assumption that the corresponding flow had an
extension. To support custom flows (with no extension), this behavior
had to be changed. The description can now be passed on initialization.
The description describing it was auto generated from run_task is now
correctly only added if the run was generated through run_flow_on_task.

* Draft for Custom Flow tutorial

* Add minimal docstring to OpenMLRun

I am not for each field what the specifications are.

* Process code review feedback

In particular:
 - text changes
 - fetch true labels from the dataset instead

* Use the format utility function in automatic runs

To format the predictions.

* Process @mfeurer feedback

* Rename arguments of list_evaluations (#933)

* list evals name change

* list evals - update

* adding config file to user guide (#931)

* adding config file to user guide

* finished requested changes

* Edit api (#935)

* version1

* minor fixes

* tests

* reformat code

* check new version

* remove get data

* code format

* review comments

* fix duplicate

* type annotate

* example

* tests for exceptions

* fix pep8

* black format

* Adding support for scikit-learn > 0.22 (#936)

* Preliminary changes

* Updating unit tests for sklearn 0.22 and above

* Triggering sklearn tests + fixes

* Refactoring to inspect.signature in extensions

* Add flake8-print in pre-commit (#939)

* Add flake8-print in pre-commit config

* Replace print statements with logging

* Fix edit api (#940)

* fix edit api

* Update subflow paragraph

* Check the ClassificationTask has class label set

* Test task is of supported type

* Add tests for format_prediction

* Adding Python 3.8 support (#916)

* Adding Python 3.8 support

* Fixing indentation

* Execute test cases for 3.8

* Testing

* Making install script fail

* Process feedback Neeratyoy

* Test Exception with Regex

Also throw NotImplementedError instead of TypeError for unsupported task
types. Added links in the example.

* change edit_api to reflect server (#941)

* change edit_api to reflect server

* change test and example to reflect rest API changes

* tutorial comments

* Update datasets_tutorial.py

* Create first section: Creating Custom Flow

* Add Section: Using the Flow

It is incomplete as while trying to explain how to format the
predictions, I realized a utility function is required.

* Allow run description text to be custom

Previously the description text that accompanies the prediction file was
auto-generated with the assumption that the corresponding flow had an
extension. To support custom flows (with no extension), this behavior
had to be changed. The description can now be passed on initialization.
The description describing it was auto generated from run_task is now
correctly only added if the run was generated through run_flow_on_task.

* Draft for Custom Flow tutorial

* Add minimal docstring to OpenMLRun

I am not for each field what the specifications are.

* Process code review feedback

In particular:
 - text changes
 - fetch true labels from the dataset instead

* Use the format utility function in automatic runs

To format the predictions.

* Process @mfeurer feedback

* Update subflow paragraph

* Check the ClassificationTask has class label set

* Test task is of supported type

* Add tests for format_prediction

* Process feedback Neeratyoy

* Test Exception with Regex

Also throw NotImplementedError instead of TypeError for unsupported task
types. Added links in the example.

Co-authored-by: Bilgecelik <38037323+Bilgecelik@users.noreply.github.com>
Co-authored-by: marcoslbueno <38478211+marcoslbueno@users.noreply.github.com>
Co-authored-by: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Co-authored-by: Neeratyoy Mallik <neeratyoy@gmail.com>
Co-authored-by: zikun <33176974+zikun@users.noreply.github.com>
---
 examples/30_extended/custom_flow_tutorial.py | 205 +++++++++++++++++++
 openml/runs/functions.py                     |  97 ++++++++-
 openml/runs/run.py                           |  54 +++--
 tests/test_runs/test_run_functions.py        |  50 ++++-
 4 files changed, 375 insertions(+), 31 deletions(-)
 create mode 100644 examples/30_extended/custom_flow_tutorial.py

diff --git a/examples/30_extended/custom_flow_tutorial.py b/examples/30_extended/custom_flow_tutorial.py
new file mode 100644
index 000000000..3b918e108
--- /dev/null
+++ b/examples/30_extended/custom_flow_tutorial.py
@@ -0,0 +1,205 @@
+"""
+================================
+Creating and Using a Custom Flow
+================================
+
+The most convenient way to create a flow for your machine learning workflow is to generate it
+automatically as described in the `Obtain Flow IDs <https://openml.github.io/openml-python/master/examples/30_extended/flow_id_tutorial.html#sphx-glr-examples-30-extended-flow-id-tutorial-py>`_ tutorial.  # noqa E501
+However, there are scenarios where this is not possible, such
+as when the flow uses a framework without an extension or when the flow is described by a script.
+
+In those cases you can still create a custom flow by following the steps of this tutorial.
+As an example we will use the flows generated for the `AutoML Benchmark <https://openml.github.io/automlbenchmark/>`_,
+and also show how to link runs to the custom flow.
+"""
+
+####################################################################################################
+
+# License: BSD 3-Clause
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+from collections import OrderedDict
+import numpy as np
+
+import openml
+from openml import OpenMLClassificationTask
+from openml.runs.functions import format_prediction
+
+openml.config.start_using_configuration_for_example()
+
+####################################################################################################
+# 1. Defining the flow
+# ====================
+# The first step is to define all the hyperparameters of your flow.
+# The API pages feature a descriptions of each variable of the `OpenMLFlow <https://openml.github.io/openml-python/master/generated/openml.OpenMLFlow.html#openml.OpenMLFlow>`_.  # noqa E501
+# Note that `external version` and `name` together uniquely identify a flow.
+#
+# The AutoML Benchmark runs AutoML systems across a range of tasks.
+# OpenML stores Flows for each AutoML system. However, the AutoML benchmark adds
+# preprocessing to the flow, so should be described in a new flow.
+#
+# We will break down the flow arguments into several groups, for the tutorial.
+# First we will define the name and version information.
+# Make sure to leave enough information so others can determine exactly which
+# version of the package/script is used. Use tags so users can find your flow easily.
+
+general = dict(
+    name="automlbenchmark_autosklearn",
+    description=(
+        "Auto-sklearn as set up by the AutoML Benchmark"
+        "Source: https://github.com/openml/automlbenchmark/releases/tag/v0.9"
+    ),
+    external_version="amlb==0.9",
+    language="English",
+    tags=["amlb", "benchmark", "study_218"],
+    dependencies="amlb==0.9",
+)
+
+####################################################################################################
+# Next we define the flow hyperparameters. We define their name and default value in `parameters`,
+# and provide meta-data for each hyperparameter through `parameters_meta_info`.
+# Note that even though the argument name is `parameters` they describe the hyperparameters.
+# The use of ordered dicts is required.
+
+flow_hyperparameters = dict(
+    parameters=OrderedDict(time="240", memory="32", cores="8"),
+    parameters_meta_info=OrderedDict(
+        cores=OrderedDict(description="number of available cores", data_type="int"),
+        memory=OrderedDict(description="memory in gigabytes", data_type="int"),
+        time=OrderedDict(description="time in minutes", data_type="int"),
+    ),
+)
+
+####################################################################################################
+# It is possible to build a flow which uses other flows.
+# For example, the Random Forest Classifier is a flow, but you could also construct a flow
+# which uses a Random Forest Classifier in a ML pipeline. When constructing the pipeline flow,
+# you can use the Random Forest Classifier flow as a *subflow*. It allows for
+# all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
+#
+# In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
+# This allows people to specify auto-sklearn hyperparameters used in this flow.
+# In general, using a subflow is not required.
+#
+# Note: flow 15275 is not actually the right flow on the test server,
+# but that does not matter for this demonstration.
+
+autosklearn_flow = openml.flows.get_flow(15275)  # auto-sklearn 0.5.1
+subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+
+####################################################################################################
+# With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
+# Because we provided all the details already, we do not need to provide a `model` to the flow.
+#
+# In our case, we don't even have a model. It is possible to have a model but still require
+# to follow these steps when the model (python object) does not have an extensions from which
+# to automatically extract the hyperparameters.
+# So whether you have a model with no extension or no model at all, explicitly set
+# the model of the flow to `None`.
+
+autosklearn_amlb_flow = openml.flows.OpenMLFlow(
+    **general, **flow_hyperparameters, **subflow, model=None,
+)
+autosklearn_amlb_flow.publish()
+print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
+
+####################################################################################################
+# 2. Using the flow
+# ====================
+# This Section will show how to upload run data for your custom flow.
+# Take care to change the values of parameters as well as the task id,
+# to reflect the actual run.
+# Task and parameter values in the example are fictional.
+
+flow_id = autosklearn_amlb_flow.flow_id
+
+parameters = [
+    OrderedDict([("oml:name", "cores"), ("oml:value", 4), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "memory"), ("oml:value", 16), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
+]
+
+task_id = 1408  # Iris Task
+task = openml.tasks.get_task(task_id)
+dataset_id = task.get_dataset().dataset_id
+
+
+####################################################################################################
+# The last bit of information for the run we need are the predicted values.
+# The exact format of the predictions will depend on the task.
+#
+# The predictions should always be a list of lists, each list should contain:
+# - the repeat number: for repeated evaluation strategies. (e.g. repeated cross-validation)
+# - the fold number: for cross-validation. (what should this be for holdout?)
+# - 0: this field is for backward compatibility.
+# - index: the row (of the original dataset) for which the prediction was made.
+# - p_1, ..., p_c: for each class the predicted probability of the sample
+#   belonging to that class. (no elements for regression tasks)
+#   Make sure the order of these elements follows the order of `task.class_labels`.
+# - the predicted class/value for the sample
+# - the true class/value for the sample
+#
+# When using openml-python extensions (such as through `run_model_on_task`),
+# all of this formatting is automatic.
+# Unfortunately we can not automate this procedure for custom flows,
+# which means a little additional effort is required.
+#
+# Here we generated some random predictions in place.
+# You can ignore this code, or use it to better understand the formatting of the predictions.
+#
+# Find the repeats/folds for this task:
+n_repeats, n_folds, _ = task.get_split_dimensions()
+all_test_indices = [
+    (repeat, fold, index)
+    for repeat in range(n_repeats)
+    for fold in range(n_folds)
+    for index in task.get_train_test_split_indices(fold, repeat)[1]
+]
+
+# random class probabilities (Iris has 150 samples and 3 classes):
+r = np.random.rand(150 * n_repeats, 3)
+# scale the random values so that the probabilities of each sample sum to 1:
+y_proba = r / r.sum(axis=1).reshape(-1, 1)
+y_pred = y_proba.argmax(axis=1)
+
+class_map = dict(zip(range(3), task.class_labels))
+_, y_true = task.get_X_and_y()
+y_true = [class_map[y] for y in y_true]
+
+# We format the predictions with the utility function `format_prediction`.
+# It will organize the relevant data in the expected format/order.
+predictions = []
+for where, y, yp, proba in zip(all_test_indices, y_true, y_pred, y_proba):
+    repeat, fold, index = where
+
+    prediction = format_prediction(
+        task=task,
+        repeat=repeat,
+        fold=fold,
+        index=index,
+        prediction=class_map[yp],
+        truth=y,
+        proba={c: pb for (c, pb) in zip(task.class_labels, proba)},
+    )
+    predictions.append(prediction)
+
+####################################################################################################
+# Finally we can create the OpenMLRun object and upload.
+# We use the argument setup_string because the used flow was a script.
+
+benchmark_command = f"python3 runbenchmark.py auto-sklearn medium -m aws -t 119"
+my_run = openml.runs.OpenMLRun(
+    task_id=task_id,
+    flow_id=flow_id,
+    dataset_id=dataset_id,
+    parameter_settings=parameters,
+    setup_string=benchmark_command,
+    data_content=predictions,
+    tags=["study_218"],
+    description_text="Run generated by the Custom Flow tutorial.",
+)
+my_run.publish()
+print("run created:", my_run.run_id)
+
+openml.config.stop_using_configuration_for_example()
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index b3b15d16e..a3888d3a1 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -4,6 +4,7 @@
 import io
 import itertools
 import os
+import time
 from typing import Any, List, Dict, Optional, Set, Tuple, Union, TYPE_CHECKING  # noqa F401
 import warnings
 
@@ -250,7 +251,8 @@ def run_flow_on_task(
     )
 
     data_content, trace, fold_evaluations, sample_evaluations = res
-
+    fields = [*run_environment, time.strftime("%c"), "Created by run_flow_on_task"]
+    generated_description = "\n".join(fields)
     run = OpenMLRun(
         task_id=task.task_id,
         flow_id=flow_id,
@@ -262,6 +264,7 @@ def run_flow_on_task(
         data_content=data_content,
         flow=flow,
         setup_string=flow.extension.create_setup_string(flow.model),
+        description_text=generated_description,
     )
 
     if (upload_flow or avoid_duplicate_runs) and flow.flow_id is not None:
@@ -478,13 +481,17 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             for i, tst_idx in enumerate(test_indices):
 
-                arff_line = [rep_no, fold_no, sample_no, tst_idx]  # type: List[Any]
                 if task.class_labels is not None:
-                    for j, class_label in enumerate(task.class_labels):
-                        arff_line.append(proba_y[i][j])
-
-                    arff_line.append(task.class_labels[pred_y[i]])
-                    arff_line.append(task.class_labels[test_y[i]])
+                    arff_line = format_prediction(
+                        task=task,
+                        repeat=rep_no,
+                        fold=fold_no,
+                        sample=sample_no,
+                        index=tst_idx,
+                        prediction=task.class_labels[pred_y[i]],
+                        truth=task.class_labels[test_y[i]],
+                        proba=dict(zip(task.class_labels, proba_y[i])),
+                    )
                 else:
                     raise ValueError("The task has no class labels")
 
@@ -498,7 +505,15 @@ def _calculate_local_measure(sklearn_fn, openml_name):
         elif isinstance(task, OpenMLRegressionTask):
 
             for i in range(0, len(test_indices)):
-                arff_line = [rep_no, fold_no, test_indices[i], pred_y[i], test_y[i]]
+                arff_line = format_prediction(
+                    task=task,
+                    repeat=rep_no,
+                    fold=fold_no,
+                    index=test_indices[i],
+                    prediction=pred_y[i],
+                    truth=test_y[i],
+                )
+
                 arff_datacontent.append(arff_line)
 
             if add_local_measures:
@@ -815,7 +830,7 @@ def list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all runs matching all of the given filters.
@@ -887,7 +902,7 @@ def list_runs(
         tag=tag,
         study=study,
         display_errors=display_errors,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -900,7 +915,7 @@ def _list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     Perform API call `/run/list/{filters}'
@@ -1004,3 +1019,63 @@ def __list_runs(api_call, output_format="dict"):
         runs = pd.DataFrame.from_dict(runs, orient="index")
 
     return runs
+
+
+def format_prediction(
+    task: OpenMLSupervisedTask,
+    repeat: int,
+    fold: int,
+    index: int,
+    prediction: Union[str, int, float],
+    truth: Union[str, int, float],
+    sample: Optional[int] = None,
+    proba: Optional[Dict[str, float]] = None,
+) -> List[Union[str, int, float]]:
+    """ Format the predictions in the specific order as required for the run results.
+
+    Parameters
+    ----------
+    task: OpenMLSupervisedTask
+        Task for which to format the predictions.
+    repeat: int
+        From which repeat this predictions is made.
+    fold: int
+        From which fold this prediction is made.
+    index: int
+        For which index this prediction is made.
+    prediction: str, int or float
+        The predicted class label or value.
+    truth: str, int or float
+        The true class label or value.
+    sample: int, optional (default=None)
+        From which sample set this prediction is made.
+        Required only for LearningCurve tasks.
+    proba: Dict[str, float], optional (default=None)
+        For classification tasks only.
+        A mapping from each class label to their predicted probability.
+        The dictionary should contain an entry for each of the `task.class_labels`.
+        E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}
+
+    Returns
+    -------
+    A list with elements for the prediction results of a run.
+
+    """
+    if isinstance(task, OpenMLClassificationTask):
+        if proba is None:
+            raise ValueError("`proba` is required for classification task")
+        if task.class_labels is None:
+            raise ValueError("The classification task must have class labels set")
+        if not set(task.class_labels) == set(proba):
+            raise ValueError("Each class should have a predicted probability")
+        if sample is None:
+            if isinstance(task, OpenMLLearningCurveTask):
+                raise ValueError("`sample` can not be none for LearningCurveTask")
+            else:
+                sample = 0
+        probabilities = [proba[c] for c in task.class_labels]
+        return [repeat, fold, sample, index, *probabilities, truth, prediction]
+    elif isinstance(task, OpenMLRegressionTask):
+        return [repeat, fold, index, truth, prediction]
+    else:
+        raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
diff --git a/openml/runs/run.py b/openml/runs/run.py
index a61fc4688..b8be9c3a3 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -27,14 +27,37 @@
 class OpenMLRun(OpenMLBase):
     """OpenML Run: result of running a model on an openml dataset.
 
-       Parameters
-       ----------
-       task_id : int
-           Refers to the task.
-       flow_id : int
-           Refers to the flow.
-       dataset_id: int
-           Refers to the data.
+    Parameters
+    ----------
+    task_id: int
+    flow_id: int
+    dataset_id: int
+    setup_string: str
+    output_files: Dict[str, str]
+        A dictionary that specifies where each related file can be found.
+    setup_id: int
+    tags: List[str]
+    uploader: int
+        User ID of the uploader.
+    uploader_name: str
+    evaluations: Dict
+    fold_evaluations: Dict
+    sample_evaluations: Dict
+    data_content: List[List]
+        The predictions generated from executing this run.
+    trace: OpenMLRunTrace
+    model: object
+    task_type: str
+    task_evaluation_measure: str
+    flow_name: str
+    parameter_settings: List[OrderedDict]
+    predictions_url: str
+    task: OpenMLTask
+    flow: OpenMLFlow
+    run_id: int
+    description_text: str, optional
+        Description text to add to the predictions file.
+        If left None,
     """
 
     def __init__(
@@ -62,6 +85,7 @@ def __init__(
         task=None,
         flow=None,
         run_id=None,
+        description_text=None,
     ):
         self.uploader = uploader
         self.uploader_name = uploader_name
@@ -87,6 +111,7 @@ def __init__(
         self.model = model
         self.tags = tags
         self.predictions_url = predictions_url
+        self.description_text = description_text
 
     @property
     def id(self) -> Optional[int]:
@@ -264,16 +289,13 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
         if self.flow is None:
             self.flow = get_flow(self.flow_id)
 
-        run_environment = (
-            self.flow.extension.get_version_information()
-            + [time.strftime("%c")]
-            + ["Created by run_task()"]
-        )
+        if self.description_text is None:
+            self.description_text = time.strftime("%c")
         task = get_task(self.task_id)
 
         arff_dict = OrderedDict()  # type: 'OrderedDict[str, Any]'
         arff_dict["data"] = self.data_content
-        arff_dict["description"] = "\n".join(run_environment)
+        arff_dict["description"] = self.description_text
         arff_dict["relation"] = "openml_task_{}_predictions".format(task.task_id)
 
         if isinstance(task, OpenMLLearningCurveTask):
@@ -485,9 +507,9 @@ def _get_file_elements(self) -> Dict:
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
         """
-        if self.model is None:
+        if self.parameter_settings is None and self.model is None:
             raise PyOpenMLError(
-                "OpenMLRun obj does not contain a model. " "(This should never happen.) "
+                "OpenMLRun must contain a model or be initialized with parameter_settings."
             )
         if self.flow_id is None:
             if self.flow is None:
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index aca9580c9..fc53ea366 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -22,10 +22,7 @@
 
 import openml.extensions.sklearn
 from openml.testing import TestBase, SimpleImputer
-from openml.runs.functions import (
-    _run_task_get_arffcontent,
-    run_exists,
-)
+from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskTypeEnum
 
@@ -1342,3 +1339,48 @@ def test_run_flow_on_task_downloaded_flow(self):
         run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
+
+    def test_format_prediction_non_supervised(self):
+        # non-supervised tasks don't exist on the test server
+        openml.config.server = self.production_server
+        clustering = openml.tasks.get_task(126033, download_data=False)
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(
+            NotImplementedError, r"Formatting for <class '[\w.]+'> is not supported."
+        ):
+            format_prediction(clustering, *ignored_input)
+
+    def test_format_prediction_classification_no_probabilities(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(ValueError, "`proba` is required for classification task"):
+            format_prediction(classification, *ignored_input, proba=None)
+
+    def test_format_prediction_classification_incomplete_probabilities(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        ignored_input = [0] * 5
+        incomplete_probabilities = {c: 0.2 for c in classification.class_labels[1:]}
+        with self.assertRaisesRegex(ValueError, "Each class should have a predicted probability"):
+            format_prediction(classification, *ignored_input, proba=incomplete_probabilities)
+
+    def test_format_prediction_task_without_classlabels_set(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        classification.class_labels = None
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(
+            ValueError, "The classification task must have class labels set"
+        ):
+            format_prediction(classification, *ignored_input, proba={})
+
+    def test_format_prediction_task_learning_curve_sample_not_set(self):
+        learning_curve = openml.tasks.get_task(801, download_data=False)
+        probabilities = {c: 0.2 for c in learning_curve.class_labels}
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(ValueError, "`sample` can not be none for LearningCurveTask"):
+            format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
+
+    def test_format_prediction_task_regression(self):
+        regression = openml.tasks.get_task(self.TEST_SERVER_TASK_REGRESSION[0], download_data=False)
+        ignored_input = [0] * 5
+        res = format_prediction(regression, *ignored_input)
+        self.assertListEqual(res, [0] * 5)

From 3d85fa7a46b54064627e0cbc0a5f403fdbdc0ac1 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 2 Sep 2020 13:17:10 +0200
Subject: [PATCH 37/48] Better support for passthrough and drop in sklearn
 extension  (#943)

* support passthrough and drop in sklearn extension when serialized to xml dict

* make test work with sklearn==0.21

* improve PR

* Add additional unit tests

* fix test

* incorporate feedback and generalize unit tests
---
 openml/extensions/sklearn/extension.py        | 320 ++++++---
 .../test_sklearn_extension.py                 | 640 ++++++++++--------
 2 files changed, 597 insertions(+), 363 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 4a3015bdc..2b94d2cfd 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -56,6 +56,10 @@
 ]
 SIMPLE_TYPES = tuple([bool, int, float, str] + SIMPLE_NUMPY_TYPES)
 
+SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
+COMPONENT_REFERENCE = "component_reference"
+COMPOSITION_STEP_CONSTANT = "composition_step_constant"
+
 
 class SklearnExtension(Extension):
     """Connect scikit-learn to OpenML-Python."""
@@ -249,8 +253,11 @@ def _deserialize_sklearn(
     ) -> Any:
         """Recursive function to deserialize a scikit-learn flow.
 
-        This function delegates all work to the respective functions to deserialize special data
-        structures etc.
+        This function inspects an object to deserialize and decides how to do so. This function
+        delegates all work to the respective functions to deserialize special data structures etc.
+        This function works on everything that has been serialized to OpenML: OpenMLFlow,
+        components (which are flows themselves), functions, hyperparameter distributions (for
+        random search) and the actual hyperparameter values themselves.
 
         Parameters
         ----------
@@ -258,8 +265,9 @@ def _deserialize_sklearn(
             the object to deserialize (can be flow object, or any serialized
             parameter value that is accepted by)
 
-        components : dict
-
+        components : Optional[dict]
+            Components of the current flow being de-serialized. These will not be used when
+            de-serializing the actual flow, but when de-serializing a component reference.
 
         initialize_with_defaults : bool, optional (default=False)
             If this flag is set, the hyperparameter values of flows will be
@@ -307,11 +315,16 @@ def _deserialize_sklearn(
                     rval = self._deserialize_rv_frozen(value)
                 elif serialized_type == "function":
                     rval = self._deserialize_function(value)
-                elif serialized_type == "component_reference":
+                elif serialized_type in (COMPOSITION_STEP_CONSTANT, COMPONENT_REFERENCE):
+                    if serialized_type == COMPOSITION_STEP_CONSTANT:
+                        pass
+                    elif serialized_type == COMPONENT_REFERENCE:
+                        value = self._deserialize_sklearn(
+                            value, recursion_depth=depth_pp, strict_version=strict_version
+                        )
+                    else:
+                        raise NotImplementedError(serialized_type)
                     assert components is not None  # Necessary for mypy
-                    value = self._deserialize_sklearn(
-                        value, recursion_depth=depth_pp, strict_version=strict_version
-                    )
                     step_name = value["step_name"]
                     key = value["key"]
                     component = self._deserialize_sklearn(
@@ -407,6 +420,13 @@ def _serialize_sklearn(self, o: Any, parent_model: Optional[Any] = None) -> Any:
         if self.is_estimator(o):
             # is the main model or a submodel
             rval = self._serialize_model(o)
+        elif (
+            isinstance(o, (list, tuple))
+            and len(o) == 2
+            and o[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
+            and isinstance(parent_model, sklearn.pipeline._BaseComposition)
+        ):
+            rval = o
         elif isinstance(o, (list, tuple)):
             # TODO: explain what type of parameter is here
             rval = [self._serialize_sklearn(element, parent_model) for element in o]
@@ -711,8 +731,13 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
         for key in subcomponents:
             if isinstance(subcomponents[key], OpenMLFlow):
                 name = subcomponents[key].name
-            elif isinstance(subcomponents[key], str):  # 'drop', 'passthrough' can be passed
+            elif (
+                isinstance(subcomponents[key], str)
+                and subcomponents[key] in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
                 name = subcomponents[key]
+            else:
+                raise TypeError(type(subcomponents[key]))
             if key in subcomponents_explicit:
                 sub_components_names += "," + key + "=" + name
             else:
@@ -727,17 +752,8 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
 
         # Get the external versions of all sub-components
         external_version = self._get_external_version_string(model, subcomponents)
-
-        dependencies = "\n".join(
-            [
-                self._format_external_version("sklearn", sklearn.__version__,),
-                "numpy>=1.6.1",
-                "scipy>=0.9",
-            ]
-        )
-
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
-        sklearn_version_formatted = sklearn_version.replace("==", "_")
+        dependencies = self._get_dependencies()
+        tags = self._get_tags()
 
         sklearn_description = self._get_sklearn_description(model)
         flow = OpenMLFlow(
@@ -750,17 +766,7 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
             parameters=parameters,
             parameters_meta_info=parameters_meta_info,
             external_version=external_version,
-            tags=[
-                "openml-python",
-                "sklearn",
-                "scikit-learn",
-                "python",
-                sklearn_version_formatted,
-                # TODO: add more tags based on the scikit-learn
-                # module a flow is in? For example automatically
-                # annotate a class of sklearn.svm.SVC() with the
-                # tag svm?
-            ],
+            tags=tags,
             extension=self,
             language="English",
             # TODO fill in dependencies!
@@ -769,6 +775,31 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
 
         return flow
 
+    def _get_dependencies(self) -> str:
+        dependencies = "\n".join(
+            [
+                self._format_external_version("sklearn", sklearn.__version__,),
+                "numpy>=1.6.1",
+                "scipy>=0.9",
+            ]
+        )
+        return dependencies
+
+    def _get_tags(self) -> List[str]:
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
+        sklearn_version_formatted = sklearn_version.replace("==", "_")
+        return [
+            "openml-python",
+            "sklearn",
+            "scikit-learn",
+            "python",
+            sklearn_version_formatted,
+            # TODO: add more tags based on the scikit-learn
+            # module a flow is in? For example automatically
+            # annotate a class of sklearn.svm.SVC() with the
+            # tag svm?
+        ]
+
     def _get_external_version_string(
         self, model: Any, sub_components: Dict[str, OpenMLFlow],
     ) -> str:
@@ -777,22 +808,25 @@ def _get_external_version_string(
         # version of all subcomponents, which themselves already contain all
         # requirements for their subcomponents. The external version string is a
         # sorted concatenation of all modules which are present in this run.
-        model_package_name = model.__module__.split(".")[0]
-        module = importlib.import_module(model_package_name)
-        model_package_version_number = module.__version__  # type: ignore
-        external_version = self._format_external_version(
-            model_package_name, model_package_version_number,
-        )
-        openml_version = self._format_external_version("openml", openml.__version__)
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
 
         external_versions = set()
-        external_versions.add(external_version)
+
+        # The model is None if the flow is a placeholder flow such as 'passthrough' or 'drop'
+        if model is not None:
+            model_package_name = model.__module__.split(".")[0]
+            module = importlib.import_module(model_package_name)
+            model_package_version_number = module.__version__  # type: ignore
+            external_version = self._format_external_version(
+                model_package_name, model_package_version_number,
+            )
+            external_versions.add(external_version)
+
+        openml_version = self._format_external_version("openml", openml.__version__)
+        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)
         external_versions.add(openml_version)
         external_versions.add(sklearn_version)
         for visitee in sub_components.values():
-            # 'drop', 'passthrough', None can be passed as estimators
-            if isinstance(visitee, str):
+            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
                 continue
             for external_version in visitee.external_version.split(","):
                 external_versions.add(external_version)
@@ -807,7 +841,7 @@ def _check_multiple_occurence_of_component_in_flow(
 
         while len(to_visit_stack) > 0:
             visitee = to_visit_stack.pop()
-            if isinstance(visitee, str):  # 'drop', 'passthrough' can be passed as estimators
+            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
                 known_sub_components.add(visitee)
             elif visitee.name in known_sub_components:
                 raise ValueError(
@@ -865,8 +899,15 @@ def flatten_all(list_):
             )
 
             # Check that all list elements are of simple types.
-            nested_list_of_simple_types = is_non_empty_list_of_lists_with_same_type and all(
-                [isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval)]
+            nested_list_of_simple_types = (
+                is_non_empty_list_of_lists_with_same_type
+                and all([isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval)])
+                and all(
+                    [
+                        len(rv) in (2, 3) and rv[1] not in SKLEARN_PIPELINE_STRING_COMPONENTS
+                        for rv in rval
+                    ]
+                )
             )
 
             if is_non_empty_list_of_lists_with_same_type and not nested_list_of_simple_types:
@@ -879,16 +920,18 @@ def flatten_all(list_):
                 for i, sub_component_tuple in enumerate(rval):
                     identifier = sub_component_tuple[0]
                     sub_component = sub_component_tuple[1]
-                    # sub_component_type = type(sub_component_tuple)
+                    sub_component_type = type(sub_component_tuple)
                     if not 2 <= len(sub_component_tuple) <= 3:
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
-                        msg = "Length of tuple does not match assumptions"
+                        msg = "Length of tuple of type {} does not match assumptions".format(
+                            sub_component_type
+                        )
                         raise ValueError(msg)
 
                     if isinstance(sub_component, str):
-                        if sub_component != "drop" and sub_component != "passthrough":
+                        if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
                             msg = (
                                 "Second item of tuple does not match assumptions. "
                                 "If string, can be only 'drop' or 'passthrough' but"
@@ -921,15 +964,45 @@ def flatten_all(list_):
 
                     # when deserializing the parameter
                     sub_components_explicit.add(identifier)
-                    sub_components[identifier] = sub_component
-                    component_reference = OrderedDict()  # type: Dict[str, Union[str, Dict]]
-                    component_reference["oml-python:serialized_object"] = "component_reference"
-                    cr_value = OrderedDict()  # type: Dict[str, Any]
-                    cr_value["key"] = identifier
-                    cr_value["step_name"] = identifier
-                    if len(sub_component_tuple) == 3:
-                        cr_value["argument_1"] = sub_component_tuple[2]
-                    component_reference["value"] = cr_value
+                    if isinstance(sub_component, str):
+
+                        external_version = self._get_external_version_string(None, {})
+                        dependencies = self._get_dependencies()
+                        tags = self._get_tags()
+
+                        sub_components[identifier] = OpenMLFlow(
+                            name=sub_component,
+                            description="Placeholder flow for scikit-learn's string pipeline "
+                            "members",
+                            components=OrderedDict(),
+                            parameters=OrderedDict(),
+                            parameters_meta_info=OrderedDict(),
+                            external_version=external_version,
+                            tags=tags,
+                            language="English",
+                            dependencies=dependencies,
+                            model=None,
+                        )
+                        component_reference = OrderedDict()  # type: Dict[str, Union[str, Dict]]
+                        component_reference[
+                            "oml-python:serialized_object"
+                        ] = COMPOSITION_STEP_CONSTANT
+                        cr_value = OrderedDict()  # type: Dict[str, Any]
+                        cr_value["key"] = identifier
+                        cr_value["step_name"] = identifier
+                        if len(sub_component_tuple) == 3:
+                            cr_value["argument_1"] = sub_component_tuple[2]
+                        component_reference["value"] = cr_value
+                    else:
+                        sub_components[identifier] = sub_component
+                        component_reference = OrderedDict()
+                        component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
+                        cr_value = OrderedDict()
+                        cr_value["key"] = identifier
+                        cr_value["step_name"] = identifier
+                        if len(sub_component_tuple) == 3:
+                            cr_value["argument_1"] = sub_component_tuple[2]
+                        component_reference["value"] = cr_value
                     parameter_value.append(component_reference)
 
                 # Here (and in the elif and else branch below) are the only
@@ -949,7 +1022,7 @@ def flatten_all(list_):
                 sub_components[k] = rval
                 sub_components_explicit.add(k)
                 component_reference = OrderedDict()
-                component_reference["oml-python:serialized_object"] = "component_reference"
+                component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
                 cr_value = OrderedDict()
                 cr_value["key"] = k
                 cr_value["step_name"] = None
@@ -1052,25 +1125,28 @@ def _deserialize_model(
             )
             parameter_dict[name] = rval
 
-        module_name = model_name.rsplit(".", 1)
-        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-
-        if keep_defaults:
-            # obtain all params with a default
-            param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
-
-            # delete the params that have a default from the dict,
-            # so they get initialized with their default value
-            # except [...]
-            for param in param_defaults:
-                # [...] the ones that also have a key in the components dict.
-                # As OpenML stores different flows for ensembles with different
-                # (base-)components, in OpenML terms, these are not considered
-                # hyperparameters but rather constants (i.e., changing them would
-                # result in a different flow)
-                if param not in components.keys():
-                    del parameter_dict[param]
-        return model_class(**parameter_dict)
+        if model_name is None and flow.name in SKLEARN_PIPELINE_STRING_COMPONENTS:
+            return flow.name
+        else:
+            module_name = model_name.rsplit(".", 1)
+            model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
+
+            if keep_defaults:
+                # obtain all params with a default
+                param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
+
+                # delete the params that have a default from the dict,
+                # so they get initialized with their default value
+                # except [...]
+                for param in param_defaults:
+                    # [...] the ones that also have a key in the components dict.
+                    # As OpenML stores different flows for ensembles with different
+                    # (base-)components, in OpenML terms, these are not considered
+                    # hyperparameters but rather constants (i.e., changing them would
+                    # result in a different flow)
+                    if param not in components.keys():
+                        del parameter_dict[param]
+            return model_class(**parameter_dict)
 
     def _check_dependencies(self, dependencies: str, strict_version: bool = True) -> None:
         if not dependencies:
@@ -1730,8 +1806,14 @@ def is_subcomponent_specification(values):
                         return False
                     if len(item) < 2:
                         return False
-                    if not isinstance(item[1], openml.flows.OpenMLFlow):
-                        return False
+                    if not isinstance(item[1], (openml.flows.OpenMLFlow, str)):
+                        if (
+                            isinstance(item[1], str)
+                            and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
+                        ):
+                            pass
+                        else:
+                            return False
                 return True
 
             # _flow is openml flow object, _param dict maps from flow name to flow
@@ -1739,10 +1821,15 @@ def is_subcomponent_specification(values):
             # unit tests / sentinels) this way, for flows without subflows we do
             # not have to rely on _flow_dict
             exp_parameters = set(_flow.parameters)
-            exp_components = set(_flow.components)
-            model_parameters = set([mp for mp in component_model.get_params() if "__" not in mp])
-            if len((exp_parameters | exp_components) ^ model_parameters) != 0:
-                flow_params = sorted(exp_parameters | exp_components)
+            if (
+                isinstance(component_model, str)
+                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
+                model_parameters = set()
+            else:
+                model_parameters = set([mp for mp in component_model.get_params(deep=False)])
+            if len(exp_parameters.symmetric_difference(model_parameters)) != 0:
+                flow_params = sorted(exp_parameters)
                 model_params = sorted(model_parameters)
                 raise ValueError(
                     "Parameters of the model do not match the "
@@ -1750,6 +1837,44 @@ def is_subcomponent_specification(values):
                     "flow:\nexpected flow parameters: "
                     "%s\nmodel parameters: %s" % (flow_params, model_params)
                 )
+            exp_components = set(_flow.components)
+            if (
+                isinstance(component_model, str)
+                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
+            ):
+                model_components = set()
+            else:
+                _ = set([mp for mp in component_model.get_params(deep=False)])
+                model_components = set(
+                    [
+                        mp
+                        for mp in component_model.get_params(deep=True)
+                        if "__" not in mp and mp not in _
+                    ]
+                )
+            if len(exp_components.symmetric_difference(model_components)) != 0:
+                is_problem = True
+                if len(exp_components - model_components) > 0:
+                    # If an expected component is not returned as a component by get_params(),
+                    # this means that it is also a parameter -> we need to check that this is
+                    # actually the case
+                    difference = exp_components - model_components
+                    component_in_model_parameters = []
+                    for component in difference:
+                        if component in model_parameters:
+                            component_in_model_parameters.append(True)
+                        else:
+                            component_in_model_parameters.append(False)
+                    is_problem = not all(component_in_model_parameters)
+                if is_problem:
+                    flow_components = sorted(exp_components)
+                    model_components = sorted(model_components)
+                    raise ValueError(
+                        "Subcomponents of the model do not match the "
+                        "parameters expected by the "
+                        "flow:\nexpected flow subcomponents: "
+                        "%s\nmodel subcomponents: %s" % (flow_components, model_components)
+                    )
 
             _params = []
             for _param_name in _flow.parameters:
@@ -1778,20 +1903,37 @@ def is_subcomponent_specification(values):
                         subcomponent_identifier = subcomponent[0]
                         subcomponent_flow = subcomponent[1]
                         if not isinstance(subcomponent_identifier, str):
-                            raise TypeError("Subcomponent identifier should be " "string")
-                        if not isinstance(subcomponent_flow, openml.flows.OpenMLFlow):
-                            raise TypeError("Subcomponent flow should be string")
+                            raise TypeError(
+                                "Subcomponent identifier should be of type string, "
+                                "but is {}".format(type(subcomponent_identifier))
+                            )
+                        if not isinstance(subcomponent_flow, (openml.flows.OpenMLFlow, str)):
+                            if (
+                                isinstance(subcomponent_flow, str)
+                                and subcomponent_flow in SKLEARN_PIPELINE_STRING_COMPONENTS
+                            ):
+                                pass
+                            else:
+                                raise TypeError(
+                                    "Subcomponent flow should be of type flow, but is {}".format(
+                                        type(subcomponent_flow)
+                                    )
+                                )
 
                         current = {
-                            "oml-python:serialized_object": "component_reference",
+                            "oml-python:serialized_object": COMPONENT_REFERENCE,
                             "value": {
                                 "key": subcomponent_identifier,
                                 "step_name": subcomponent_identifier,
                             },
                         }
                         if len(subcomponent) == 3:
-                            if not isinstance(subcomponent[2], list):
-                                raise TypeError("Subcomponent argument should be" " list")
+                            if not isinstance(subcomponent[2], list) and not isinstance(
+                                subcomponent[2], OrderedDict
+                            ):
+                                raise TypeError(
+                                    "Subcomponent argument should be list or OrderedDict"
+                                )
                             current["value"]["argument_1"] = subcomponent[2]
                         parsed_values.append(current)
                     parsed_values = json.dumps(parsed_values)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index acc93b024..90f69df17 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -71,88 +71,137 @@ def setUp(self):
 
         self.extension = SklearnExtension()
 
-    def test_serialize_model(self):
-        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
-            model = sklearn.tree.DecisionTreeClassifier(
-                criterion="entropy", max_features="auto", max_leaf_nodes=2000
-            )
+    def _serialization_test_helper(
+        self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
+    ):
 
-            tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
-            fixture_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
-            fixture_short_name = "sklearn.DecisionTreeClassifier"
-            # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = "A decision tree classifier."
-            version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
-
-            presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"'
-            # min_impurity_decrease has been introduced in 0.20
-            # min_impurity_split has been deprecated in 0.20
-            if LooseVersion(sklearn.__version__) < "0.19":
-                fixture_parameters = OrderedDict(
-                    (
-                        ("class_weight", "null"),
-                        ("criterion", '"entropy"'),
-                        ("max_depth", "null"),
-                        ("max_features", '"auto"'),
-                        ("max_leaf_nodes", "2000"),
-                        ("min_impurity_split", "1e-07"),
-                        ("min_samples_leaf", "1"),
-                        ("min_samples_split", "2"),
-                        ("min_weight_fraction_leaf", "0.0"),
-                        ("presort", "false"),
-                        ("random_state", "null"),
-                        ("splitter", '"best"'),
-                    )
-                )
-            else:
-                fixture_parameters = OrderedDict(
-                    (
-                        ("class_weight", "null"),
-                        ("criterion", '"entropy"'),
-                        ("max_depth", "null"),
-                        ("max_features", '"auto"'),
-                        ("max_leaf_nodes", "2000"),
-                        ("min_impurity_decrease", "0.0"),
-                        ("min_impurity_split", "null"),
-                        ("min_samples_leaf", "1"),
-                        ("min_samples_split", "2"),
-                        ("min_weight_fraction_leaf", "0.0"),
-                        ("presort", presort_val),
-                        ("random_state", "null"),
-                        ("splitter", '"best"'),
-                    )
-                )
-            if LooseVersion(sklearn.__version__) >= "0.22":
-                fixture_parameters.update({"ccp_alpha": "0.0"})
-                fixture_parameters.move_to_end("ccp_alpha", last=False)
-
-            structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []}
+        # Regex pattern for memory addresses of style 0x7f8e0f31ecf8
+        pattern = re.compile("0x[0-9a-f]{12}")
 
+        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
             serialization = self.extension.model_to_flow(model)
-            structure = serialization.get_structure("name")
 
-            self.assertEqual(serialization.name, fixture_name)
-            self.assertEqual(serialization.class_name, fixture_name)
-            self.assertEqual(serialization.custom_name, fixture_short_name)
-            self.assertEqual(serialization.description, fixture_description)
-            self.assertEqual(serialization.parameters, fixture_parameters)
-            self.assertEqual(serialization.dependencies, version_fixture)
-            self.assertDictEqual(structure, structure_fixture)
+            if X is not None:
+                model.fit(X, y)
 
             new_model = self.extension.flow_to_model(serialization)
             # compares string representations of the dict, as it potentially
             # contains complex objects that can not be compared with == op
-            # Only in Python 3.x, as Python 2 has Unicode issues
-            if sys.version_info[0] >= 3:
-                self.assertEqual(str(model.get_params()), str(new_model.get_params()))
+            self.assertEqual(
+                re.sub(pattern, str(model.get_params()), ""),
+                re.sub(pattern, str(new_model.get_params()), ""),
+            )
 
             self.assertEqual(type(new_model), type(model))
             self.assertIsNot(new_model, model)
 
-            self.assertEqual(new_model.get_params(), model.get_params())
-            new_model.fit(self.X, self.y)
+            if X is not None:
+                new_model.fit(self.X, self.y)
 
-            self.assertEqual(check_dependencies_mock.call_count, 1)
+            self.assertEqual(check_dependencies_mock.call_count, dependencies_mock_call_count[0])
+
+            xml = serialization._to_dict()
+            new_model2 = self.extension.flow_to_model(OpenMLFlow._from_dict(xml))
+            self.assertEqual(
+                re.sub(pattern, str(model.get_params()), ""),
+                re.sub(pattern, str(new_model2.get_params()), ""),
+            )
+
+            self.assertEqual(type(new_model2), type(model))
+            self.assertIsNot(new_model2, model)
+
+            if X is not None:
+                new_model2.fit(self.X, self.y)
+
+            self.assertEqual(check_dependencies_mock.call_count, dependencies_mock_call_count[1])
+
+            if subcomponent_parameters:
+                for nm in (new_model, new_model2):
+                    new_model_params = nm.get_params()
+                    model_params = model.get_params()
+                    for subcomponent_parameter in subcomponent_parameters:
+                        self.assertEqual(
+                            type(new_model_params[subcomponent_parameter]),
+                            type(model_params[subcomponent_parameter]),
+                        )
+                        self.assertIsNot(
+                            new_model_params[subcomponent_parameter],
+                            model_params[subcomponent_parameter],
+                        )
+                        del new_model_params[subcomponent_parameter]
+                        del model_params[subcomponent_parameter]
+                    self.assertEqual(new_model_params, model_params)
+
+            return serialization, new_model
+
+    def test_serialize_model(self):
+        model = sklearn.tree.DecisionTreeClassifier(
+            criterion="entropy", max_features="auto", max_leaf_nodes=2000
+        )
+
+        tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
+        fixture_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
+        fixture_short_name = "sklearn.DecisionTreeClassifier"
+        # str obtained from self.extension._get_sklearn_description(model)
+        fixture_description = "A decision tree classifier."
+        version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+
+        presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"'
+        # min_impurity_decrease has been introduced in 0.20
+        # min_impurity_split has been deprecated in 0.20
+        if LooseVersion(sklearn.__version__) < "0.19":
+            fixture_parameters = OrderedDict(
+                (
+                    ("class_weight", "null"),
+                    ("criterion", '"entropy"'),
+                    ("max_depth", "null"),
+                    ("max_features", '"auto"'),
+                    ("max_leaf_nodes", "2000"),
+                    ("min_impurity_split", "1e-07"),
+                    ("min_samples_leaf", "1"),
+                    ("min_samples_split", "2"),
+                    ("min_weight_fraction_leaf", "0.0"),
+                    ("presort", "false"),
+                    ("random_state", "null"),
+                    ("splitter", '"best"'),
+                )
+            )
+        else:
+            fixture_parameters = OrderedDict(
+                (
+                    ("class_weight", "null"),
+                    ("criterion", '"entropy"'),
+                    ("max_depth", "null"),
+                    ("max_features", '"auto"'),
+                    ("max_leaf_nodes", "2000"),
+                    ("min_impurity_decrease", "0.0"),
+                    ("min_impurity_split", "null"),
+                    ("min_samples_leaf", "1"),
+                    ("min_samples_split", "2"),
+                    ("min_weight_fraction_leaf", "0.0"),
+                    ("presort", presort_val),
+                    ("random_state", "null"),
+                    ("splitter", '"best"'),
+                )
+            )
+        if LooseVersion(sklearn.__version__) >= "0.22":
+            fixture_parameters.update({"ccp_alpha": "0.0"})
+            fixture_parameters.move_to_end("ccp_alpha", last=False)
+
+        structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []}
+
+        serialization, _ = self._serialization_test_helper(
+            model, X=self.X, y=self.y, subcomponent_parameters=None
+        )
+        structure = serialization.get_structure("name")
+
+        self.assertEqual(serialization.name, fixture_name)
+        self.assertEqual(serialization.class_name, fixture_name)
+        self.assertEqual(serialization.custom_name, fixture_short_name)
+        self.assertEqual(serialization.description, fixture_description)
+        self.assertEqual(serialization.parameters, fixture_parameters)
+        self.assertEqual(serialization.dependencies, version_fixture)
+        self.assertDictEqual(structure, structure_fixture)
 
     def test_can_handle_flow(self):
         openml.config.server = self.production_server
@@ -165,79 +214,67 @@ def test_can_handle_flow(self):
         openml.config.server = self.test_server
 
     def test_serialize_model_clustering(self):
-        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
-            model = sklearn.cluster.KMeans()
+        model = sklearn.cluster.KMeans()
 
-            cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
-            fixture_name = "sklearn.cluster.{}.KMeans".format(cluster_name)
-            fixture_short_name = "sklearn.KMeans"
-            # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = "K-Means clustering{}".format(
-                "" if LooseVersion(sklearn.__version__) < "0.22" else "."
-            )
-            version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+        cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
+        fixture_name = "sklearn.cluster.{}.KMeans".format(cluster_name)
+        fixture_short_name = "sklearn.KMeans"
+        # str obtained from self.extension._get_sklearn_description(model)
+        fixture_description = "K-Means clustering{}".format(
+            "" if LooseVersion(sklearn.__version__) < "0.22" else "."
+        )
+        version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
 
-            n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
-            precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+        n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+        precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
 
-            # n_jobs default has changed to None in 0.20
-            if LooseVersion(sklearn.__version__) < "0.20":
-                fixture_parameters = OrderedDict(
-                    (
-                        ("algorithm", '"auto"'),
-                        ("copy_x", "true"),
-                        ("init", '"k-means++"'),
-                        ("max_iter", "300"),
-                        ("n_clusters", "8"),
-                        ("n_init", "10"),
-                        ("n_jobs", "1"),
-                        ("precompute_distances", '"auto"'),
-                        ("random_state", "null"),
-                        ("tol", "0.0001"),
-                        ("verbose", "0"),
-                    )
+        # n_jobs default has changed to None in 0.20
+        if LooseVersion(sklearn.__version__) < "0.20":
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"auto"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", "10"),
+                    ("n_jobs", "1"),
+                    ("precompute_distances", '"auto"'),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
                 )
-            else:
-                fixture_parameters = OrderedDict(
-                    (
-                        ("algorithm", '"auto"'),
-                        ("copy_x", "true"),
-                        ("init", '"k-means++"'),
-                        ("max_iter", "300"),
-                        ("n_clusters", "8"),
-                        ("n_init", "10"),
-                        ("n_jobs", n_jobs_val),
-                        ("precompute_distances", precomp_val),
-                        ("random_state", "null"),
-                        ("tol", "0.0001"),
-                        ("verbose", "0"),
-                    )
+            )
+        else:
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"auto"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", "10"),
+                    ("n_jobs", n_jobs_val),
+                    ("precompute_distances", precomp_val),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
                 )
-            fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
-
-            serialization = self.extension.model_to_flow(model)
-            structure = serialization.get_structure("name")
-
-            self.assertEqual(serialization.name, fixture_name)
-            self.assertEqual(serialization.class_name, fixture_name)
-            self.assertEqual(serialization.custom_name, fixture_short_name)
-            self.assertEqual(serialization.description, fixture_description)
-            self.assertEqual(serialization.parameters, fixture_parameters)
-            self.assertEqual(serialization.dependencies, version_fixture)
-            self.assertDictEqual(structure, fixture_structure)
-
-            new_model = self.extension.flow_to_model(serialization)
-            # compares string representations of the dict, as it potentially
-            # contains complex objects that can not be compared with == op
-            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-
-            self.assertEqual(type(new_model), type(model))
-            self.assertIsNot(new_model, model)
+            )
+        fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
 
-            self.assertEqual(new_model.get_params(), model.get_params())
-            new_model.fit(self.X)
+        serialization, _ = self._serialization_test_helper(
+            model, X=None, y=None, subcomponent_parameters=None
+        )
+        structure = serialization.get_structure("name")
 
-            self.assertEqual(check_dependencies_mock.call_count, 1)
+        self.assertEqual(serialization.name, fixture_name)
+        self.assertEqual(serialization.class_name, fixture_name)
+        self.assertEqual(serialization.custom_name, fixture_short_name)
+        self.assertEqual(serialization.description, fixture_description)
+        self.assertEqual(serialization.parameters, fixture_parameters)
+        self.assertEqual(serialization.dependencies, version_fixture)
+        self.assertDictEqual(structure, fixture_structure)
 
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
@@ -273,7 +310,13 @@ def test_serialize_model_with_subcomponent(self):
             "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["base_estimator"],
         }
 
-        serialization = self.extension.model_to_flow(model)
+        serialization, _ = self._serialization_test_helper(
+            model,
+            X=self.X,
+            y=self.y,
+            subcomponent_parameters=["base_estimator"],
+            dependencies_mock_call_count=(2, 4),
+        )
         structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
@@ -293,24 +336,6 @@ def test_serialize_model_with_subcomponent(self):
         )
         self.assertDictEqual(structure, fixture_structure)
 
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-
-        self.assertEqual(type(new_model), type(model))
-        self.assertIsNot(new_model, model)
-
-        self.assertIsNot(new_model.base_estimator, model.base_estimator)
-        self.assertEqual(new_model.base_estimator.get_params(), model.base_estimator.get_params())
-        new_model_params = new_model.get_params()
-        del new_model_params["base_estimator"]
-        model_params = model.get_params()
-        del model_params["base_estimator"]
-
-        self.assertEqual(new_model_params, model_params)
-        new_model.fit(self.X, self.y)
-
     def test_serialize_pipeline(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         dummy = sklearn.dummy.DummyClassifier(strategy="prior")
@@ -350,7 +375,13 @@ def test_serialize_pipeline(self):
             "sklearn.dummy.DummyClassifier": ["dummy"],
         }
 
-        serialization = self.extension.model_to_flow(model)
+        serialization, new_model = self._serialization_test_helper(
+            model,
+            X=self.X,
+            y=self.y,
+            subcomponent_parameters=["scaler", "dummy", "steps"],
+            dependencies_mock_call_count=(3, 6),
+        )
         structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
@@ -390,32 +421,10 @@ def test_serialize_pipeline(self):
         self.assertIsInstance(serialization.components["scaler"], OpenMLFlow)
         self.assertIsInstance(serialization.components["dummy"], OpenMLFlow)
 
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        # Only in Python 3.x, as Python 2 has Unicode issues
-        if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-
-        self.assertEqual(type(new_model), type(model))
-        self.assertIsNot(new_model, model)
-
         self.assertEqual([step[0] for step in new_model.steps], [step[0] for step in model.steps])
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
-        new_model_params = new_model.get_params()
-        del new_model_params["scaler"]
-        del new_model_params["dummy"]
-        del new_model_params["steps"]
-        fu_params = model.get_params()
-        del fu_params["scaler"]
-        del fu_params["dummy"]
-        del fu_params["steps"]
-
-        self.assertEqual(new_model_params, fu_params)
-        new_model.fit(self.X, self.y)
-
     def test_serialize_pipeline_clustering(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         km = sklearn.cluster.KMeans()
@@ -454,7 +463,13 @@ def test_serialize_pipeline_clustering(self):
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
             "sklearn.cluster.{}.KMeans".format(cluster_name): ["clusterer"],
         }
-        serialization = self.extension.model_to_flow(model)
+        serialization, new_model = self._serialization_test_helper(
+            model,
+            X=None,
+            y=None,
+            subcomponent_parameters=["scaler", "steps", "clusterer"],
+            dependencies_mock_call_count=(3, 6),
+        )
         structure = serialization.get_structure("name")
 
         self.assertEqual(serialization.name, fixture_name)
@@ -493,33 +508,10 @@ def test_serialize_pipeline_clustering(self):
         self.assertIsInstance(serialization.components["scaler"], OpenMLFlow)
         self.assertIsInstance(serialization.components["clusterer"], OpenMLFlow)
 
-        # del serialization.model
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        # Only in Python 3.x, as Python 2 has Unicode issues
-        if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-
-        self.assertEqual(type(new_model), type(model))
-        self.assertIsNot(new_model, model)
-
         self.assertEqual([step[0] for step in new_model.steps], [step[0] for step in model.steps])
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
-        new_model_params = new_model.get_params()
-        del new_model_params["scaler"]
-        del new_model_params["clusterer"]
-        del new_model_params["steps"]
-        fu_params = model.get_params()
-        del fu_params["scaler"]
-        del fu_params["clusterer"]
-        del fu_params["steps"]
-
-        self.assertEqual(new_model_params, fu_params)
-        new_model.fit(self.X, self.y)
-
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -536,6 +528,7 @@ def test_serialize_column_transformer(self):
                     sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
                     [3, 4, 5],
                 ),
+                ("drop", "drop", [6, 7, 8]),
             ],
             remainder="passthrough",
         )
@@ -544,7 +537,8 @@ def test_serialize_column_transformer(self):
         fixture = (
             "sklearn.compose._column_transformer.ColumnTransformer("
             "numeric=sklearn.preprocessing.{}.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)".format(scaler_name)
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder,"
+            "drop=drop)".format(scaler_name)
         )
         fixture_short_name = "sklearn.ColumnTransformer"
 
@@ -567,25 +561,21 @@ def test_serialize_column_transformer(self):
             fixture: [],
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["numeric"],
             "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"],
+            "drop": ["drop"],
         }
 
-        serialization = self.extension.model_to_flow(model)
+        serialization, new_model = self._serialization_test_helper(
+            model,
+            X=None,
+            y=None,
+            subcomponent_parameters=["transformers", "numeric", "nominal"],
+            dependencies_mock_call_count=(4, 8),
+        )
         structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture)
         self.assertEqual(serialization.custom_name, fixture_short_name)
         self.assertEqual(serialization.description, fixture_description)
         self.assertDictEqual(structure, fixture_structure)
-        # del serialization.model
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        # Only in Python 3.x, as Python 2 has Unicode issues
-        if sys.version_info[0] >= 3:
-            self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-        self.assertEqual(type(new_model), type(model))
-        self.assertIsNot(new_model, model)
-        serialization2 = self.extension.model_to_flow(new_model)
-        assert_flows_equal(serialization, serialization2)
 
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
@@ -653,21 +643,25 @@ def test_serialize_column_transformer_pipeline(self):
         else:
             fixture_description = self.extension._get_sklearn_description(model)
 
-        serialization = self.extension.model_to_flow(model)
+        serialization, new_model = self._serialization_test_helper(
+            model,
+            X=None,
+            y=None,
+            subcomponent_parameters=(
+                "transformer",
+                "classifier",
+                "transformer__transformers",
+                "steps",
+                "transformer__nominal",
+                "transformer__numeric",
+            ),
+            dependencies_mock_call_count=(5, 10),
+        )
         structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.description, fixture_description)
 
         self.assertDictEqual(structure, fixture_structure)
-        # del serialization.model
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        self.assertEqual(str(model.get_params()), str(new_model.get_params()))
-        self.assertEqual(type(new_model), type(model))
-        self.assertIsNot(new_model, model)
-        serialization2 = self.extension.model_to_flow(new_model)
-        assert_flows_equal(serialization, serialization2)
 
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20", reason="Pipeline processing behaviour updated"
@@ -680,7 +674,13 @@ def test_serialize_feature_union(self):
         scaler = sklearn.preprocessing.StandardScaler()
 
         fu = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
-        serialization = self.extension.model_to_flow(fu)
+        serialization, new_model = self._serialization_test_helper(
+            fu,
+            X=self.X,
+            y=self.y,
+            subcomponent_parameters=("ohe", "scaler", "transformer_list"),
+            dependencies_mock_call_count=(3, 6),
+        )
         structure = serialization.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
@@ -699,15 +699,6 @@ def test_serialize_feature_union(self):
         }
         self.assertEqual(serialization.name, fixture_name)
         self.assertDictEqual(structure, fixture_structure)
-        new_model = self.extension.flow_to_model(serialization)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        # Only in Python 3.x, as Python 2 has Unicode issues
-        if sys.version_info[0] >= 3:
-            self.assertEqual(str(fu.get_params()), str(new_model.get_params()))
-
-        self.assertEqual(type(new_model), type(fu))
-        self.assertIsNot(new_model, fu)
         self.assertEqual(new_model.transformer_list[0][0], fu.transformer_list[0][0])
         self.assertEqual(
             new_model.transformer_list[0][1].get_params(), fu.transformer_list[0][1].get_params()
@@ -724,29 +715,20 @@ def test_serialize_feature_union(self):
         self.assertIsNot(new_model.transformer_list[0][1], fu.transformer_list[0][1])
         self.assertIsNot(new_model.transformer_list[1][1], fu.transformer_list[1][1])
 
-        new_model_params = new_model.get_params()
-        del new_model_params["ohe"]
-        del new_model_params["scaler"]
-        del new_model_params["transformer_list"]
-        fu_params = fu.get_params()
-        del fu_params["ohe"]
-        del fu_params["scaler"]
-        del fu_params["transformer_list"]
-
-        self.assertEqual(new_model_params, fu_params)
-        new_model.fit(self.X, self.y)
-
         fu.set_params(scaler="drop")
-        serialization = self.extension.model_to_flow(fu)
+        serialization, new_model = self._serialization_test_helper(
+            fu,
+            X=self.X,
+            y=self.y,
+            subcomponent_parameters=("ohe", "transformer_list"),
+            dependencies_mock_call_count=(3, 6),
+        )
         self.assertEqual(
             serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
             "scaler=drop)".format(module_name_encoder),
         )
-        new_model = self.extension.flow_to_model(serialization)
-        self.assertEqual(type(new_model), type(fu))
-        self.assertIsNot(new_model, fu)
         self.assertIs(new_model.transformer_list[1][1], "drop")
 
     def test_serialize_feature_union_switched_names(self):
@@ -755,8 +737,14 @@ def test_serialize_feature_union_switched_names(self):
         scaler = sklearn.preprocessing.StandardScaler()
         fu1 = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
         fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[("scaler", ohe), ("ohe", scaler)])
-        fu1_serialization = self.extension.model_to_flow(fu1)
-        fu2_serialization = self.extension.model_to_flow(fu2)
+
+        fu1_serialization, _ = self._serialization_test_helper(
+            fu1, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+        )
+        fu2_serialization, _ = self._serialization_test_helper(
+            fu2, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+        )
+
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
         scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
@@ -776,7 +764,7 @@ def test_serialize_feature_union_switched_names(self):
         )
 
     def test_serialize_complex_flow(self):
-        ohe = sklearn.preprocessing.OneHotEncoder()
+        ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore")
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
             base_estimator=sklearn.tree.DecisionTreeClassifier()
@@ -785,9 +773,9 @@ def test_serialize_complex_flow(self):
             steps=[("ohe", ohe), ("scaler", scaler), ("boosting", boosting)]
         )
         parameter_grid = {
-            "base_estimator__max_depth": scipy.stats.randint(1, 10),
-            "learning_rate": scipy.stats.uniform(0.01, 0.99),
-            "n_estimators": [1, 5, 10, 100],
+            "boosting__base_estimator__max_depth": scipy.stats.randint(1, 10),
+            "boosting__learning_rate": scipy.stats.uniform(0.01, 0.99),
+            "boosting__n_estimators": [1, 5, 10, 100],
         }
         # convert to ordered dict, sorted by keys) due to param grid check
         parameter_grid = OrderedDict(sorted(parameter_grid.items()))
@@ -795,7 +783,13 @@ def test_serialize_complex_flow(self):
         rs = sklearn.model_selection.RandomizedSearchCV(
             estimator=model, param_distributions=parameter_grid, cv=cv
         )
-        serialized = self.extension.model_to_flow(rs)
+        serialized, new_model = self._serialization_test_helper(
+            rs,
+            X=self.X,
+            y=self.y,
+            subcomponent_parameters=(),
+            dependencies_mock_call_count=(6, 12),
+        )
         structure = serialized.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
@@ -829,18 +823,100 @@ def test_serialize_complex_flow(self):
         self.assertEqual(serialized.name, fixture_name)
         self.assertEqual(structure, fixture_structure)
 
-        # now do deserialization
-        deserialized = self.extension.flow_to_model(serialized)
-        # compares string representations of the dict, as it potentially
-        # contains complex objects that can not be compared with == op
-        # JvR: compare str length, due to memory address of distribution
-        self.assertEqual(len(str(rs.get_params())), len(str(deserialized.get_params())))
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="Pipeline till 0.20 doesn't support 'passthrough'",
+    )
+    def test_serialize_strings_as_pipeline_steps(self):
+        import sklearn.compose
 
-        # Checks that sklearn_to_flow is idempotent.
-        serialized2 = self.extension.model_to_flow(deserialized)
-        self.assertNotEqual(rs, deserialized)
-        # Would raise an exception if the flows would be unequal
-        assert_flows_equal(serialized, serialized2)
+        # First check: test whether a passthrough in a pipeline is serialized correctly
+        model = sklearn.pipeline.Pipeline(steps=[("transformer", "passthrough")])
+        serialized = self.extension.model_to_flow(model)
+        self.assertIsInstance(serialized, OpenMLFlow)
+        self.assertEqual(len(serialized.components), 1)
+        self.assertEqual(serialized.components["transformer"].name, "passthrough")
+        serialized = self.extension._serialize_sklearn(
+            ("transformer", "passthrough"), parent_model=model
+        )
+        self.assertEqual(serialized, ("transformer", "passthrough"))
+        extracted_info = self.extension._extract_information_from_model(model)
+        self.assertEqual(len(extracted_info[2]), 1)
+        self.assertIsInstance(extracted_info[2]["transformer"], OpenMLFlow)
+        self.assertEqual(extracted_info[2]["transformer"].name, "passthrough")
+
+        # Second check: test whether a lone passthrough in a column transformer is serialized
+        # correctly
+        model = sklearn.compose.ColumnTransformer([("passthrough", "passthrough", (0,))])
+        serialized = self.extension.model_to_flow(model)
+        self.assertIsInstance(serialized, OpenMLFlow)
+        self.assertEqual(len(serialized.components), 1)
+        self.assertEqual(serialized.components["passthrough"].name, "passthrough")
+        serialized = self.extension._serialize_sklearn(
+            ("passthrough", "passthrough"), parent_model=model
+        )
+        self.assertEqual(serialized, ("passthrough", "passthrough"))
+        extracted_info = self.extension._extract_information_from_model(model)
+        self.assertEqual(len(extracted_info[2]), 1)
+        self.assertIsInstance(extracted_info[2]["passthrough"], OpenMLFlow)
+        self.assertEqual(extracted_info[2]["passthrough"].name, "passthrough")
+
+        # Third check: passthrough and drop in a column transformer
+        model = sklearn.compose.ColumnTransformer(
+            [("passthrough", "passthrough", (0,)), ("drop", "drop", (1,))]
+        )
+        serialized = self.extension.model_to_flow(model)
+        self.assertIsInstance(serialized, OpenMLFlow)
+        self.assertEqual(len(serialized.components), 2)
+        self.assertEqual(serialized.components["passthrough"].name, "passthrough")
+        self.assertEqual(serialized.components["drop"].name, "drop")
+        serialized = self.extension._serialize_sklearn(
+            ("passthrough", "passthrough"), parent_model=model
+        )
+        self.assertEqual(serialized, ("passthrough", "passthrough"))
+        extracted_info = self.extension._extract_information_from_model(model)
+        self.assertEqual(len(extracted_info[2]), 2)
+        self.assertIsInstance(extracted_info[2]["passthrough"], OpenMLFlow)
+        self.assertIsInstance(extracted_info[2]["drop"], OpenMLFlow)
+        self.assertEqual(extracted_info[2]["passthrough"].name, "passthrough")
+        self.assertEqual(extracted_info[2]["drop"].name, "drop")
+
+        # Fourth check: having an actual preprocessor in the column transformer, too
+        model = sklearn.compose.ColumnTransformer(
+            [
+                ("passthrough", "passthrough", (0,)),
+                ("drop", "drop", (1,)),
+                ("test", sklearn.preprocessing.StandardScaler(), (2,)),
+            ]
+        )
+        serialized = self.extension.model_to_flow(model)
+        self.assertIsInstance(serialized, OpenMLFlow)
+        self.assertEqual(len(serialized.components), 3)
+        self.assertEqual(serialized.components["passthrough"].name, "passthrough")
+        self.assertEqual(serialized.components["drop"].name, "drop")
+        serialized = self.extension._serialize_sklearn(
+            ("passthrough", "passthrough"), parent_model=model
+        )
+        self.assertEqual(serialized, ("passthrough", "passthrough"))
+        extracted_info = self.extension._extract_information_from_model(model)
+        self.assertEqual(len(extracted_info[2]), 3)
+        self.assertIsInstance(extracted_info[2]["passthrough"], OpenMLFlow)
+        self.assertIsInstance(extracted_info[2]["drop"], OpenMLFlow)
+        self.assertEqual(extracted_info[2]["passthrough"].name, "passthrough")
+        self.assertEqual(extracted_info[2]["drop"].name, "drop")
+
+        # Fifth check: test whether a lone drop in a feature union is serialized correctly
+        model = sklearn.pipeline.FeatureUnion([("drop", "drop")])
+        serialized = self.extension.model_to_flow(model)
+        self.assertIsInstance(serialized, OpenMLFlow)
+        self.assertEqual(len(serialized.components), 1)
+        self.assertEqual(serialized.components["drop"].name, "drop")
+        serialized = self.extension._serialize_sklearn(("drop", "drop"), parent_model=model)
+        self.assertEqual(serialized, ("drop", "drop"))
+        extracted_info = self.extension._extract_information_from_model(model)
+        self.assertEqual(len(extracted_info[2]), 1)
+        self.assertIsInstance(extracted_info[2]["drop"], OpenMLFlow)
+        self.assertEqual(extracted_info[2]["drop"].name, "drop")
 
     def test_serialize_type(self):
         supported_types = [float, np.float, np.float32, np.float64, int, np.int, np.int32, np.int64]
@@ -1978,14 +2054,21 @@ def test_run_on_model_with_empty_steps(self):
         run, flow = openml.runs.run_model_on_task(model=clf, task=task, return_flow=True)
 
         self.assertEqual(len(flow.components), 3)
-        self.assertEqual(flow.components["dummystep"], "passthrough")
-        self.assertTrue(isinstance(flow.components["classifier"], OpenMLFlow))
-        self.assertTrue(isinstance(flow.components["prep"], OpenMLFlow))
-        self.assertTrue(
-            isinstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
+        self.assertIsInstance(flow.components["dummystep"], OpenMLFlow)
+        self.assertEqual(flow.components["dummystep"].name, "passthrough")
+        self.assertIsInstance(flow.components["classifier"], OpenMLFlow)
+        if LooseVersion(sklearn.__version__) < "0.22":
+            self.assertEqual(flow.components["classifier"].name, "sklearn.svm.classes.SVC")
+        else:
+            self.assertEqual(flow.components["classifier"].name, "sklearn.svm._classes.SVC")
+        self.assertIsInstance(flow.components["prep"], OpenMLFlow)
+        self.assertEqual(flow.components["prep"].class_name, "sklearn.pipeline.Pipeline")
+        self.assertIsInstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
+        self.assertIsInstance(
+            flow.components["prep"].components["columntransformer"].components["cat"], OpenMLFlow,
         )
         self.assertEqual(
-            flow.components["prep"].components["columntransformer"].components["cat"], "drop"
+            flow.components["prep"].components["columntransformer"].components["cat"].name, "drop"
         )
 
         # de-serializing flow to a model with non-actionable step
@@ -1996,6 +2079,15 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(len(model.named_steps), 3)
         self.assertEqual(model.named_steps["dummystep"], "passthrough")
 
+        xml = flow._to_dict()
+        new_model = self.extension.flow_to_model(OpenMLFlow._from_dict(xml))
+
+        new_model.fit(X, y)
+        self.assertEqual(type(new_model), type(clf))
+        self.assertNotEqual(new_model, clf)
+        self.assertEqual(len(new_model.named_steps), 3)
+        self.assertEqual(new_model.named_steps["dummystep"], "passthrough")
+
     def test_sklearn_serialization_with_none_step(self):
         msg = (
             "Cannot serialize objects of None type. Please use a valid "

From d303cedf5498e9eaf41f084f25d49d032f7630f4 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Mon, 28 Sep 2020 16:52:21 +0200
Subject: [PATCH 38/48] Added PEP 561 compliance (#945) (#946)

* Added PEP 561 compliance (#945)

* FIX: mypy test dependancy

* FIX: mypy test dependancy (#945)

* FIX: Added mypy to CI list of test packages
---
 ci_scripts/install.sh | 12 +++++++++++-
 doc/progress.rst      |  1 +
 openml/py.typed       |  0
 setup.py              |  3 ++-
 4 files changed, 14 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 ci_scripts/install.sh
 create mode 100644 openml/py.typed

diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
old mode 100644
new mode 100755
index 29181c5c4..67530af53
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -38,7 +38,7 @@ python --version
 
 if [[ "$TEST_DIST" == "true" ]]; then
     pip install twine nbconvert jupyter_client matplotlib pyarrow pytest pytest-xdist pytest-timeout \
-        nbformat oslo.concurrency flaky
+        nbformat oslo.concurrency flaky mypy
     python setup.py sdist
     # Find file which was modified last as done in https://stackoverflow.com/a/4561987
     dist=`find dist -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -f2- -d" "`
@@ -52,6 +52,7 @@ fi
 python -c "import numpy; print('numpy %s' % numpy.__version__)"
 python -c "import scipy; print('scipy %s' % scipy.__version__)"
 
+
 if [[ "$DOCPUSH" == "true" ]]; then
     conda install --yes gxx_linux-64 gcc_linux-64 swig
     pip install -e '.[examples,examples_unix]'
@@ -64,6 +65,15 @@ if [[ "$RUN_FLAKE8" == "true" ]]; then
     pre-commit install
 fi
 
+# PEP 561 compliance check
+# Assumes mypy relies solely on the PEP 561 standard
+if ! python -m mypy -c "import openml"; then
+   echo "Failed: PEP 561 compliance"
+   exit 1
+else
+   echo "Success: PEP 561 compliant"
+fi
+
 # Install scikit-learn last to make sure the openml package installation works
 # from a clean environment without scikit-learn.
 pip install scikit-learn==$SKLEARN_VERSION
diff --git a/doc/progress.rst b/doc/progress.rst
index ef5ed6bae..a9f1e2f2a 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -18,6 +18,7 @@ Changelog
 * MAINT #865: OpenML no longer bundles test files in the source distribution.
 * MAINT #897: Dropping support for Python 3.5.
 * ADD #894: Support caching of datasets using feather format as an option.
+* ADD #945: PEP 561 compliance for distributing Type information
 
 0.10.2
 ~~~~~~
diff --git a/openml/py.typed b/openml/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/setup.py b/setup.py
index 476becc10..9e9a093e4 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
     packages=setuptools.find_packages(
         include=["openml.*", "openml"], exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
     ),
-    package_data={"": ["*.txt", "*.md"]},
+    package_data={"": ["*.txt", "*.md", "py.typed"]},
     python_requires=">=3.6",
     install_requires=[
         "liac-arff>=2.4.0",
@@ -68,6 +68,7 @@
             "pyarrow",
             "pre-commit",
             "pytest-cov",
+            "mypy",
         ],
         "examples": [
             "matplotlib",

From 5641828b3239f5a8e993a93f4f69f98b406f71cb Mon Sep 17 00:00:00 2001
From: Ivan Gonzalez <scratchmex@gmail.com>
Date: Fri, 2 Oct 2020 03:03:47 -0500
Subject: [PATCH 39/48] Remove todo list and fix broken link (#954)

---
 doc/usage.rst | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/doc/usage.rst b/doc/usage.rst
index d7ad0d523..1d54baa62 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -138,7 +138,7 @@ available metadata. The tutorial which follows explains how to get a list of
 datasets, how to filter the list to find the dataset that suits your
 requirements and how to download a dataset:
 
-* `Filter and explore datasets <examples/datasets_tutorial.html>`_
+* `Filter and explore datasets <examples/30_extended/datasets_tutorial.html>`_
 
 OpenML is about sharing machine learning results and the datasets they were
 obtained on. Learn how to share your datasets in the following tutorial:
@@ -152,14 +152,3 @@ Extending OpenML-Python
 OpenML-Python provides an extension interface to connect other machine learning libraries than
 scikit-learn to OpenML. Please check the :ref:`api_extensions` and use the
 scikit-learn extension in :class:`openml.extensions.sklearn.SklearnExtension` as a starting point.
-
-~~~~~~~~~~~~~~~
-Advanced topics
-~~~~~~~~~~~~~~~
-
-We are working on tutorials for the following topics:
-
-* Querying datasets (TODO)
-* Creating tasks (TODO)
-* Working offline (TODO)
-* Analyzing large amounts of results (TODO)

From 0def226c736395451688472173e1fb6050a145cf Mon Sep 17 00:00:00 2001
From: Abraham Francis <abfr049@gmail.com>
Date: Mon, 5 Oct 2020 14:39:06 +0530
Subject: [PATCH 40/48] Class to enum (#958)

* convert TaskTypeEnum class to TaskType enum

* update docstrings for TaskType

* fix bug in examples, import TaskType directly

* use task_type instead of task_type_id
---
 examples/30_extended/tasks_tutorial.py        | 12 +--
 .../40_paper/2015_neurips_feurer_example.py   |  2 +-
 openml/runs/functions.py                      |  8 +-
 openml/runs/run.py                            | 24 +++---
 openml/tasks/__init__.py                      |  4 +-
 openml/tasks/functions.py                     | 74 +++++++++----------
 openml/tasks/task.py                          | 50 +++++++------
 openml/testing.py                             |  8 +-
 tests/test_runs/test_run_functions.py         | 10 +--
 tests/test_tasks/test_classification_task.py  |  6 +-
 tests/test_tasks/test_clustering_task.py      |  9 ++-
 tests/test_tasks/test_learning_curve_task.py  |  6 +-
 tests/test_tasks/test_regression_task.py      |  5 +-
 tests/test_tasks/test_task.py                 | 12 +--
 tests/test_tasks/test_task_functions.py       | 25 ++++---
 15 files changed, 134 insertions(+), 121 deletions(-)

diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py
index 4befe1a07..c755d265e 100644
--- a/examples/30_extended/tasks_tutorial.py
+++ b/examples/30_extended/tasks_tutorial.py
@@ -8,6 +8,7 @@
 # License: BSD 3-Clause
 
 import openml
+from openml.tasks import TaskType
 import pandas as pd
 
 ############################################################################
@@ -30,7 +31,7 @@
 #
 # We will start by simply listing only *supervised classification* tasks:
 
-tasks = openml.tasks.list_tasks(task_type_id=1)
+tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
 
 ############################################################################
 # **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, which we convert
@@ -45,7 +46,9 @@
 
 # As conversion to a pandas dataframe is a common task, we have added this functionality to the
 # OpenML-Python library which can be used by passing ``output_format='dataframe'``:
-tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format="dataframe")
+tasks_df = openml.tasks.list_tasks(
+    task_type=TaskType.SUPERVISED_CLASSIFICATION, output_format="dataframe"
+)
 print(tasks_df.head())
 
 ############################################################################
@@ -155,7 +158,7 @@
 #
 # Creating a task requires the following input:
 #
-# * task_type_id: The task type ID, required (see below). Required.
+# * task_type: The task type ID, required (see below). Required.
 # * dataset_id: The dataset ID. Required.
 # * target_name: The name of the attribute you aim to predict. Optional.
 # * estimation_procedure_id : The ID of the estimation procedure used to create train-test
@@ -186,9 +189,8 @@
 openml.config.start_using_configuration_for_example()
 
 try:
-    tasktypes = openml.tasks.TaskTypeEnum
     my_task = openml.tasks.create_task(
-        task_type_id=tasktypes.SUPERVISED_CLASSIFICATION,
+        task_type=TaskType.SUPERVISED_CLASSIFICATION,
         dataset_id=128,
         target_name="class",
         evaluation_measure="predictive_accuracy",
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index c68189784..733a436ad 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -58,7 +58,7 @@
 # deactivated, which also deactivated the tasks on them. More information on active or inactive
 # datasets can be found in the `online docs <https://docs.openml.org/#dataset-status>`_.
 tasks = openml.tasks.list_tasks(
-    task_type_id=openml.tasks.TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+    task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION,
     status="all",
     output_format="dataframe",
 )
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index a3888d3a1..2b767eaa1 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -32,7 +32,7 @@
 )
 from .run import OpenMLRun
 from .trace import OpenMLRunTrace
-from ..tasks import TaskTypeEnum, get_task
+from ..tasks import TaskType, get_task
 
 # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
 if TYPE_CHECKING:
@@ -274,7 +274,7 @@ def run_flow_on_task(
         run.parameter_settings = flow.extension.obtain_parameter_values(flow)
 
     # now we need to attach the detailed evaluations
-    if task.task_type_id == TaskTypeEnum.LEARNING_CURVE:
+    if task.task_type_id == TaskType.LEARNING_CURVE:
         run.sample_evaluations = sample_evaluations
     else:
         run.fold_evaluations = fold_evaluations
@@ -772,7 +772,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
 
     if "predictions" not in files and from_server is True:
         task = openml.tasks.get_task(task_id)
-        if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY:
+        if task.task_type_id == TaskType.SUBGROUP_DISCOVERY:
             raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
         else:
             # JvR: actually, I am not sure whether this error should be raised.
@@ -1008,7 +1008,7 @@ def __list_runs(api_call, output_format="dict"):
             "setup_id": int(run_["oml:setup_id"]),
             "flow_id": int(run_["oml:flow_id"]),
             "uploader": int(run_["oml:uploader"]),
-            "task_type": int(run_["oml:task_type_id"]),
+            "task_type": TaskType(int(run_["oml:task_type_id"])),
             "upload_time": str(run_["oml:upload_time"]),
             "error_message": str((run_["oml:error_message"]) or ""),
         }
diff --git a/openml/runs/run.py b/openml/runs/run.py
index b8be9c3a3..0311272b2 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -16,7 +16,7 @@
 from ..flows import get_flow
 from ..tasks import (
     get_task,
-    TaskTypeEnum,
+    TaskType,
     OpenMLClassificationTask,
     OpenMLLearningCurveTask,
     OpenMLClusteringTask,
@@ -401,17 +401,13 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
 
         attribute_names = [att[0] for att in predictions_arff["attributes"]]
         if (
-            task.task_type_id
-            in [TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE]
+            task.task_type_id in [TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE]
             and "correct" not in attribute_names
         ):
             raise ValueError('Attribute "correct" should be set for ' "classification task runs")
-        if (
-            task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION
-            and "truth" not in attribute_names
-        ):
+        if task.task_type_id == TaskType.SUPERVISED_REGRESSION and "truth" not in attribute_names:
             raise ValueError('Attribute "truth" should be set for ' "regression task runs")
-        if task.task_type_id != TaskTypeEnum.CLUSTERING and "prediction" not in attribute_names:
+        if task.task_type_id != TaskType.CLUSTERING and "prediction" not in attribute_names:
             raise ValueError('Attribute "predict" should be set for ' "supervised task runs")
 
         def _attribute_list_to_dict(attribute_list):
@@ -431,11 +427,11 @@ def _attribute_list_to_dict(attribute_list):
         predicted_idx = attribute_dict["prediction"]  # Assume supervised task
 
         if (
-            task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION
-            or task.task_type_id == TaskTypeEnum.LEARNING_CURVE
+            task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION
+            or task.task_type_id == TaskType.LEARNING_CURVE
         ):
             correct_idx = attribute_dict["correct"]
-        elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
+        elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
             correct_idx = attribute_dict["truth"]
         has_samples = False
         if "sample" in attribute_dict:
@@ -465,14 +461,14 @@ def _attribute_list_to_dict(attribute_list):
                 samp = 0  # No learning curve sample, always 0
 
             if task.task_type_id in [
-                TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-                TaskTypeEnum.LEARNING_CURVE,
+                TaskType.SUPERVISED_CLASSIFICATION,
+                TaskType.LEARNING_CURVE,
             ]:
                 prediction = predictions_arff["attributes"][predicted_idx][1].index(
                     line[predicted_idx]
                 )
                 correct = predictions_arff["attributes"][predicted_idx][1].index(line[correct_idx])
-            elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
+            elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
                 prediction = line[predicted_idx]
                 correct = line[correct_idx]
             if rep not in values_predict:
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
index f5e046f37..cba0aa14f 100644
--- a/openml/tasks/__init__.py
+++ b/openml/tasks/__init__.py
@@ -7,7 +7,7 @@
     OpenMLRegressionTask,
     OpenMLClusteringTask,
     OpenMLLearningCurveTask,
-    TaskTypeEnum,
+    TaskType,
 )
 from .split import OpenMLSplit
 from .functions import (
@@ -29,5 +29,5 @@
     "get_tasks",
     "list_tasks",
     "OpenMLSplit",
-    "TaskTypeEnum",
+    "TaskType",
 ]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index a82ce4a12..f775f5e10 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -15,7 +15,7 @@
     OpenMLClassificationTask,
     OpenMLClusteringTask,
     OpenMLLearningCurveTask,
-    TaskTypeEnum,
+    TaskType,
     OpenMLRegressionTask,
     OpenMLSupervisedTask,
     OpenMLTask,
@@ -109,7 +109,7 @@ def _get_estimation_procedure_list():
         procs.append(
             {
                 "id": int(proc_["oml:id"]),
-                "task_type_id": int(proc_["oml:ttid"]),
+                "task_type_id": TaskType(int(proc_["oml:ttid"])),
                 "name": proc_["oml:name"],
                 "type": proc_["oml:type"],
             }
@@ -119,7 +119,7 @@ def _get_estimation_procedure_list():
 
 
 def list_tasks(
-    task_type_id: Optional[int] = None,
+    task_type: Optional[TaskType] = None,
     offset: Optional[int] = None,
     size: Optional[int] = None,
     tag: Optional[str] = None,
@@ -127,14 +127,14 @@ def list_tasks(
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
-    Return a number of tasks having the given tag and task_type_id
+    Return a number of tasks having the given tag and task_type
 
     Parameters
     ----------
-    Filter task_type_id is separated from the other filters because
-    it is used as task_type_id in the task description, but it is named
+    Filter task_type is separated from the other filters because
+    it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
-    task_type_id : int, optional
+    task_type : TaskType, optional
         ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
         - Supervised classification: 1
         - Supervised regression: 2
@@ -162,12 +162,12 @@ def list_tasks(
     Returns
     -------
     dict
-        All tasks having the given task_type_id and the give tag. Every task is
+        All tasks having the given task_type and the give tag. Every task is
         represented by a dictionary containing the following information:
         task id, dataset id, task_type and status. If qualities are calculated
         for the associated dataset, some of these are also returned.
     dataframe
-        All tasks having the given task_type_id and the give tag. Every task is
+        All tasks having the given task_type and the give tag. Every task is
         represented by a row in the data frame containing the following information
         as columns: task id, dataset id, task_type and status. If qualities are
         calculated for the associated dataset, some of these are also returned.
@@ -179,7 +179,7 @@ def list_tasks(
     return openml.utils._list_all(
         output_format=output_format,
         listing_call=_list_tasks,
-        task_type_id=task_type_id,
+        task_type=task_type,
         offset=offset,
         size=size,
         tag=tag,
@@ -187,15 +187,15 @@ def list_tasks(
     )
 
 
-def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
+def _list_tasks(task_type=None, output_format="dict", **kwargs):
     """
     Perform the api call to return a number of tasks having the given filters.
     Parameters
     ----------
-    Filter task_type_id is separated from the other filters because
-    it is used as task_type_id in the task description, but it is named
+    Filter task_type is separated from the other filters because
+    it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
-    task_type_id : int, optional
+    task_type : TaskType, optional
         ID of the task type as detailed
         `here <https://www.openml.org/search?type=task_type>`_.
         - Supervised classification: 1
@@ -220,8 +220,8 @@ def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
     dict or dataframe
     """
     api_call = "task/list"
-    if task_type_id is not None:
-        api_call += "/type/%d" % int(task_type_id)
+    if task_type is not None:
+        api_call += "/type/%d" % task_type.value
     if kwargs is not None:
         for operator, value in kwargs.items():
             if operator == "task_id":
@@ -259,7 +259,7 @@ def __list_tasks(api_call, output_format="dict"):
             tid = int(task_["oml:task_id"])
             task = {
                 "tid": tid,
-                "ttid": int(task_["oml:task_type_id"]),
+                "ttid": TaskType(int(task_["oml:task_type_id"])),
                 "did": int(task_["oml:did"]),
                 "name": task_["oml:name"],
                 "task_type": task_["oml:task_type"],
@@ -417,18 +417,18 @@ def _create_task_from_xml(xml):
             "oml:evaluation_measure"
         ]
 
-    task_type_id = int(dic["oml:task_type_id"])
+    task_type = TaskType(int(dic["oml:task_type_id"]))
     common_kwargs = {
         "task_id": dic["oml:task_id"],
         "task_type": dic["oml:task_type"],
-        "task_type_id": dic["oml:task_type_id"],
+        "task_type_id": task_type,
         "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
         "evaluation_measure": evaluation_measures,
     }
-    if task_type_id in (
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-        TaskTypeEnum.SUPERVISED_REGRESSION,
-        TaskTypeEnum.LEARNING_CURVE,
+    if task_type in (
+        TaskType.SUPERVISED_CLASSIFICATION,
+        TaskType.SUPERVISED_REGRESSION,
+        TaskType.LEARNING_CURVE,
     ):
         # Convert some more parameters
         for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
@@ -448,18 +448,18 @@ def _create_task_from_xml(xml):
         ]["oml:data_splits_url"]
 
     cls = {
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
-        TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
-        TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
-        TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
-    }.get(task_type_id)
+        TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+        TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+        TaskType.CLUSTERING: OpenMLClusteringTask,
+        TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+    }.get(task_type)
     if cls is None:
         raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
     return cls(**common_kwargs)
 
 
 def create_task(
-    task_type_id: int,
+    task_type: TaskType,
     dataset_id: int,
     estimation_procedure_id: int,
     target_name: Optional[str] = None,
@@ -480,7 +480,7 @@ def create_task(
 
     Parameters
     ----------
-    task_type_id : int
+    task_type : TaskType
         Id of the task type.
     dataset_id : int
         The id of the dataset for the task.
@@ -501,17 +501,17 @@ def create_task(
     OpenMLLearningCurveTask, OpenMLClusteringTask
     """
     task_cls = {
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
-        TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
-        TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
-        TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
-    }.get(task_type_id)
+        TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+        TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+        TaskType.CLUSTERING: OpenMLClusteringTask,
+        TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+    }.get(task_type)
 
     if task_cls is None:
-        raise NotImplementedError("Task type {0:d} not supported.".format(task_type_id))
+        raise NotImplementedError("Task type {0:d} not supported.".format(task_type))
     else:
         return task_cls(
-            task_type_id=task_type_id,
+            task_type_id=task_type,
             task_type=None,
             data_set_id=dataset_id,
             target_name=target_name,
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index b5d95d6d1..ab54db780 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -2,6 +2,7 @@
 
 from abc import ABC
 from collections import OrderedDict
+from enum import Enum
 import io
 import os
 from typing import Union, Tuple, Dict, List, Optional, Any
@@ -18,12 +19,24 @@
 from ..utils import _create_cache_directory_for_id
 
 
+class TaskType(Enum):
+    SUPERVISED_CLASSIFICATION = 1
+    SUPERVISED_REGRESSION = 2
+    LEARNING_CURVE = 3
+    SUPERVISED_DATASTREAM_CLASSIFICATION = 4
+    CLUSTERING = 5
+    MACHINE_LEARNING_CHALLENGE = 6
+    SURVIVAL_ANALYSIS = 7
+    SUBGROUP_DISCOVERY = 8
+    MULTITASK_REGRESSION = 9
+
+
 class OpenMLTask(OpenMLBase):
     """OpenML Task object.
 
        Parameters
        ----------
-       task_type_id : int
+       task_type_id : TaskType
            Refers to the type of task.
        task_type : str
            Refers to the task.
@@ -36,7 +49,7 @@ class OpenMLTask(OpenMLBase):
     def __init__(
         self,
         task_id: Optional[int],
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         estimation_procedure_id: int = 1,
@@ -47,7 +60,7 @@ def __init__(
     ):
 
         self.task_id = int(task_id) if task_id is not None else None
-        self.task_type_id = int(task_type_id)
+        self.task_type_id = task_type_id
         self.task_type = task_type
         self.dataset_id = int(data_set_id)
         self.evaluation_measure = evaluation_measure
@@ -155,10 +168,10 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         task_container = OrderedDict()  # type: OrderedDict[str, OrderedDict]
         task_dict = OrderedDict(
             [("@xmlns:oml", "http://openml.org/openml")]
-        )  # type: OrderedDict[str, Union[List, str, int]]
+        )  # type: OrderedDict[str, Union[List, str, TaskType]]
 
         task_container["oml:task_inputs"] = task_dict
-        task_dict["oml:task_type_id"] = self.task_type_id
+        task_dict["oml:task_type_id"] = self.task_type_id.value
 
         # having task_inputs and adding a type annotation
         # solves wrong warnings
@@ -196,7 +209,7 @@ class OpenMLSupervisedTask(OpenMLTask, ABC):
 
     def __init__(
         self,
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         target_name: str,
@@ -240,7 +253,11 @@ def get_X_and_y(
 
         """
         dataset = self.get_dataset()
-        if self.task_type_id not in (1, 2, 3):
+        if self.task_type_id not in (
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.LEARNING_CURVE,
+        ):
             raise NotImplementedError(self.task_type)
         X, y, _, _ = dataset.get_data(dataset_format=dataset_format, target=self.target_name,)
         return X, y
@@ -286,7 +303,7 @@ class OpenMLClassificationTask(OpenMLSupervisedTask):
 
     def __init__(
         self,
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         target_name: str,
@@ -327,7 +344,7 @@ class OpenMLRegressionTask(OpenMLSupervisedTask):
 
     def __init__(
         self,
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         target_name: str,
@@ -366,7 +383,7 @@ class OpenMLClusteringTask(OpenMLTask):
 
     def __init__(
         self,
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         estimation_procedure_id: int = 17,
@@ -440,7 +457,7 @@ class OpenMLLearningCurveTask(OpenMLClassificationTask):
 
     def __init__(
         self,
-        task_type_id: int,
+        task_type_id: TaskType,
         task_type: str,
         data_set_id: int,
         target_name: str,
@@ -467,14 +484,3 @@ def __init__(
             class_labels=class_labels,
             cost_matrix=cost_matrix,
         )
-
-
-class TaskTypeEnum(object):
-    SUPERVISED_CLASSIFICATION = 1
-    SUPERVISED_REGRESSION = 2
-    LEARNING_CURVE = 3
-    SUPERVISED_DATASTREAM_CLASSIFICATION = 4
-    CLUSTERING = 5
-    MACHINE_LEARNING_CHALLENGE = 6
-    SURVIVAL_ANALYSIS = 7
-    SUBGROUP_DISCOVERY = 8
diff --git a/openml/testing.py b/openml/testing.py
index e4338effd..0b4c50972 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -17,7 +17,7 @@
     from oslo_concurrency import lockutils
 
 import openml
-from openml.tasks import TaskTypeEnum
+from openml.tasks import TaskType
 
 import logging
 
@@ -199,7 +199,7 @@ def _check_fold_timing_evaluations(
         num_repeats: int,
         num_folds: int,
         max_time_allowed: float = 60000.0,
-        task_type: int = TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+        task_type: TaskType = TaskType.SUPERVISED_CLASSIFICATION,
         check_scores: bool = True,
     ):
         """
@@ -225,9 +225,9 @@ def _check_fold_timing_evaluations(
         }
 
         if check_scores:
-            if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE):
+            if task_type in (TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE):
                 check_measures["predictive_accuracy"] = (0, 1.0)
-            elif task_type == TaskTypeEnum.SUPERVISED_REGRESSION:
+            elif task_type == TaskType.SUPERVISED_REGRESSION:
                 check_measures["mean_absolute_error"] = (0, float("inf"))
 
         self.assertIsInstance(fold_evaluations, dict)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index fc53ea366..dcc7b0b96 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -24,7 +24,7 @@
 from openml.testing import TestBase, SimpleImputer
 from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
 from openml.runs.trace import OpenMLRunTrace
-from openml.tasks import TaskTypeEnum
+from openml.tasks import TaskType
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
@@ -391,7 +391,7 @@ def _run_and_upload(
         seed=1,
         metric=sklearn.metrics.accuracy_score,
         metric_name="predictive_accuracy",
-        task_type=TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+        task_type=TaskType.SUPERVISED_CLASSIFICATION,
         sentinel=None,
     ):
         def determine_grid_size(param_grid):
@@ -476,7 +476,7 @@ def _run_and_upload_classification(
         num_iterations = 5  # for base search algorithms
         metric = sklearn.metrics.accuracy_score  # metric class
         metric_name = "predictive_accuracy"  # openml metric name
-        task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION  # task type
+        task_type = TaskType.SUPERVISED_CLASSIFICATION  # task type
 
         return self._run_and_upload(
             clf=clf,
@@ -499,7 +499,7 @@ def _run_and_upload_regression(
         num_iterations = 5  # for base search algorithms
         metric = sklearn.metrics.mean_absolute_error  # metric class
         metric_name = "mean_absolute_error"  # openml metric name
-        task_type = TaskTypeEnum.SUPERVISED_REGRESSION  # task type
+        task_type = TaskType.SUPERVISED_REGRESSION  # task type
 
         return self._run_and_upload(
             clf=clf,
@@ -1098,7 +1098,7 @@ def test__run_task_get_arffcontent(self):
         # trace. SGD does not produce any
         self.assertIsInstance(trace, type(None))
 
-        task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION
+        task_type = TaskType.SUPERVISED_CLASSIFICATION
         self._check_fold_timing_evaluations(
             fold_evaluations, num_repeats, num_folds, task_type=task_type
         )
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index b19be7017..4f03f8bff 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from openml.tasks import get_task
+from openml.tasks import TaskType, get_task
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
 
@@ -14,7 +14,7 @@ def setUp(self, n_levels: int = 1):
 
         super(OpenMLClassificationTaskTest, self).setUp()
         self.task_id = 119
-        self.task_type_id = 1
+        self.task_type = TaskType.SUPERVISED_CLASSIFICATION
         self.estimation_procedure = 1
 
     def test_get_X_and_Y(self):
@@ -30,7 +30,7 @@ def test_download_task(self):
 
         task = super(OpenMLClassificationTaskTest, self).test_download_task()
         self.assertEqual(task.task_id, self.task_id)
-        self.assertEqual(task.task_type_id, 1)
+        self.assertEqual(task.task_type_id, TaskType.SUPERVISED_CLASSIFICATION)
         self.assertEqual(task.dataset_id, 20)
 
     def test_class_labels(self):
diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py
index e46369802..c5a7a3829 100644
--- a/tests/test_tasks/test_clustering_task.py
+++ b/tests/test_tasks/test_clustering_task.py
@@ -1,6 +1,7 @@
 # License: BSD 3-Clause
 
 import openml
+from openml.tasks import TaskType
 from openml.testing import TestBase
 from .test_task import OpenMLTaskTest
 from openml.exceptions import OpenMLServerException
@@ -14,7 +15,7 @@ def setUp(self, n_levels: int = 1):
 
         super(OpenMLClusteringTaskTest, self).setUp()
         self.task_id = 146714
-        self.task_type_id = 5
+        self.task_type = TaskType.CLUSTERING
         self.estimation_procedure = 17
 
     def test_get_dataset(self):
@@ -28,7 +29,7 @@ def test_download_task(self):
         openml.config.server = self.production_server
         task = super(OpenMLClusteringTaskTest, self).test_download_task()
         self.assertEqual(task.task_id, self.task_id)
-        self.assertEqual(task.task_type_id, 5)
+        self.assertEqual(task.task_type_id, TaskType.CLUSTERING)
         self.assertEqual(task.dataset_id, 36)
 
     def test_upload_task(self):
@@ -38,7 +39,7 @@ def test_upload_task(self):
                 dataset_id = compatible_datasets[i % len(compatible_datasets)]
                 # Upload a clustering task without a ground truth.
                 task = openml.tasks.create_task(
-                    task_type_id=self.task_type_id,
+                    task_type=self.task_type,
                     dataset_id=dataset_id,
                     estimation_procedure_id=self.estimation_procedure,
                 )
@@ -59,5 +60,5 @@ def test_upload_task(self):
                     raise e
         else:
             raise ValueError(
-                "Could not create a valid task for task type ID {}".format(self.task_type_id)
+                "Could not create a valid task for task type ID {}".format(self.task_type)
             )
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index b8e156ee6..9f0157187 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from openml.tasks import get_task
+from openml.tasks import TaskType, get_task
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
 
@@ -14,7 +14,7 @@ def setUp(self, n_levels: int = 1):
 
         super(OpenMLLearningCurveTaskTest, self).setUp()
         self.task_id = 801
-        self.task_type_id = 3
+        self.task_type = TaskType.LEARNING_CURVE
         self.estimation_procedure = 13
 
     def test_get_X_and_Y(self):
@@ -30,7 +30,7 @@ def test_download_task(self):
 
         task = super(OpenMLLearningCurveTaskTest, self).test_download_task()
         self.assertEqual(task.task_id, self.task_id)
-        self.assertEqual(task.task_type_id, 3)
+        self.assertEqual(task.task_type_id, TaskType.LEARNING_CURVE)
         self.assertEqual(task.dataset_id, 20)
 
     def test_class_labels(self):
diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
index fbb3ff607..e751e63b5 100644
--- a/tests/test_tasks/test_regression_task.py
+++ b/tests/test_tasks/test_regression_task.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 
+from openml.tasks import TaskType
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
 
@@ -13,7 +14,7 @@ def setUp(self, n_levels: int = 1):
 
         super(OpenMLRegressionTaskTest, self).setUp()
         self.task_id = 625
-        self.task_type_id = 2
+        self.task_type = TaskType.SUPERVISED_REGRESSION
         self.estimation_procedure = 7
 
     def test_get_X_and_Y(self):
@@ -29,5 +30,5 @@ def test_download_task(self):
 
         task = super(OpenMLRegressionTaskTest, self).test_download_task()
         self.assertEqual(task.task_id, self.task_id)
-        self.assertEqual(task.task_type_id, 2)
+        self.assertEqual(task.task_type_id, TaskType.SUPERVISED_REGRESSION)
         self.assertEqual(task.dataset_id, 105)
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
index ae92f12ad..318785991 100644
--- a/tests/test_tasks/test_task.py
+++ b/tests/test_tasks/test_task.py
@@ -10,7 +10,7 @@
     get_dataset,
     list_datasets,
 )
-from openml.tasks import create_task, get_task
+from openml.tasks import TaskType, create_task, get_task
 
 
 class OpenMLTaskTest(TestBase):
@@ -47,7 +47,7 @@ def test_upload_task(self):
                 dataset_id = compatible_datasets[i % len(compatible_datasets)]
                 # TODO consider implementing on the diff task types.
                 task = create_task(
-                    task_type_id=self.task_type_id,
+                    task_type=self.task_type,
                     dataset_id=dataset_id,
                     target_name=self._get_random_feature(dataset_id),
                     estimation_procedure_id=self.estimation_procedure,
@@ -70,7 +70,7 @@ def test_upload_task(self):
                     raise e
         else:
             raise ValueError(
-                "Could not create a valid task for task type ID {}".format(self.task_type_id)
+                "Could not create a valid task for task type ID {}".format(self.task_type)
             )
 
     def _get_compatible_rand_dataset(self) -> List:
@@ -81,13 +81,13 @@ def _get_compatible_rand_dataset(self) -> List:
         # depending on the task type, find either datasets
         # with only symbolic features or datasets with only
         # numerical features.
-        if self.task_type_id == 2:
+        if self.task_type == TaskType.SUPERVISED_REGRESSION:
             # regression task
             for dataset_id, dataset_info in active_datasets.items():
                 if "NumberOfSymbolicFeatures" in dataset_info:
                     if dataset_info["NumberOfSymbolicFeatures"] == 0:
                         compatible_datasets.append(dataset_id)
-        elif self.task_type_id == 5:
+        elif self.task_type == TaskType.CLUSTERING:
             # clustering task
             compatible_datasets = list(active_datasets.keys())
         else:
@@ -114,7 +114,7 @@ def _get_random_feature(self, dataset_id: int) -> str:
         while True:
             random_feature_index = randint(0, len(random_dataset.features) - 1)
             random_feature = random_dataset.features[random_feature_index]
-            if self.task_type_id == 2:
+            if self.task_type == TaskType.SUPERVISED_REGRESSION:
                 if random_feature.data_type == "numeric":
                     break
             else:
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index ec62c953a..5f9b65495 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -3,6 +3,7 @@
 import os
 from unittest import mock
 
+from openml.tasks import TaskType
 from openml.testing import TestBase
 from openml import OpenMLSplit, OpenMLTask
 from openml.exceptions import OpenMLCacheException
@@ -45,12 +46,14 @@ def test__get_estimation_procedure_list(self):
         estimation_procedures = openml.tasks.functions._get_estimation_procedure_list()
         self.assertIsInstance(estimation_procedures, list)
         self.assertIsInstance(estimation_procedures[0], dict)
-        self.assertEqual(estimation_procedures[0]["task_type_id"], 1)
+        self.assertEqual(
+            estimation_procedures[0]["task_type_id"], TaskType.SUPERVISED_CLASSIFICATION
+        )
 
     def test_list_clustering_task(self):
         # as shown by #383, clustering tasks can give list/dict casting problems
         openml.config.server = self.production_server
-        openml.tasks.list_tasks(task_type_id=5, size=10)
+        openml.tasks.list_tasks(task_type=TaskType.CLUSTERING, size=10)
         # the expected outcome is that it doesn't crash. No assertions.
 
     def _check_task(self, task):
@@ -64,16 +67,16 @@ def _check_task(self, task):
 
     def test_list_tasks_by_type(self):
         num_curves_tasks = 200  # number is flexible, check server if fails
-        ttid = 3
-        tasks = openml.tasks.list_tasks(task_type_id=ttid)
+        ttid = TaskType.LEARNING_CURVE
+        tasks = openml.tasks.list_tasks(task_type=ttid)
         self.assertGreaterEqual(len(tasks), num_curves_tasks)
         for tid in tasks:
             self.assertEqual(ttid, tasks[tid]["ttid"])
             self._check_task(tasks[tid])
 
     def test_list_tasks_output_format(self):
-        ttid = 3
-        tasks = openml.tasks.list_tasks(task_type_id=ttid, output_format="dataframe")
+        ttid = TaskType.LEARNING_CURVE
+        tasks = openml.tasks.list_tasks(task_type=ttid, output_format="dataframe")
         self.assertIsInstance(tasks, pd.DataFrame)
         self.assertGreater(len(tasks), 100)
 
@@ -109,10 +112,14 @@ def test_list_tasks_paginate(self):
     def test_list_tasks_per_type_paginate(self):
         size = 10
         max = 100
-        task_types = 4
-        for j in range(1, task_types):
+        task_types = [
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.LEARNING_CURVE,
+        ]
+        for j in task_types:
             for i in range(0, max, size):
-                tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)
+                tasks = openml.tasks.list_tasks(task_type=j, offset=i, size=size)
                 self.assertGreaterEqual(size, len(tasks))
                 for tid in tasks:
                     self.assertEqual(j, tasks[tid]["ttid"])

From dde56624f021c3d9fa4d74a63a7ae41b25d2a85d Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Tue, 20 Oct 2020 18:20:44 +0200
Subject: [PATCH 41/48] Updating contribution to aid debugging (#961)

* Updating contribution to aid debugging

* More explicit instructions
---
 CONTRIBUTING.md      | 5 +++++
 doc/contributing.rst | 8 +++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8122b0b8e..6b7cffad3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -239,6 +239,11 @@ You may then run a specific module, test case, or unit test respectively:
   $ pytest tests/test_datasets/test_dataset.py::OpenMLDatasetTest::test_get_data
 ```
 
+*NOTE*: In the case the examples build fails during the Continuous Integration test online, please 
+fix the first failing example. If the first failing example switched the server from live to test 
+or vice-versa, and the subsequent examples expect the other server, the ensuing examples will fail 
+to be built as well.
+
 Happy testing!
 
 Documentation
diff --git a/doc/contributing.rst b/doc/contributing.rst
index 92a113633..354a91d1c 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -18,12 +18,10 @@ In particular, a few ways to contribute to openml-python are:
    are hosted in separate repositories and may have their own guidelines.
    For more information, see the :ref:`extensions` below.
 
- * Bug reports. If something doesn't work for you or is cumbersome, please
-  open a new issue to let us know about the problem.
-  See `this section <https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md#reporting-bugs>`_.
+ * Bug reports. If something doesn't work for you or is cumbersome, please open a new issue to let
+   us know about the problem. See `this section <https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md#reporting-bugs>`_.
 
- * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific
-  publication.
+ * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific publication.
 
  * Visit one of our `hackathons <https://meet.openml.org/>`_.
 

From d48f108c15f6daded52a4937351cc6a137d805f4 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Tue, 20 Oct 2020 18:21:37 +0200
Subject: [PATCH 42/48] MAINT #660 (#962)

Remove a faulty entry in the argument list of datasets.
---
 openml/datasets/dataset.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index a6ea76592..8c366dfb8 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -85,8 +85,6 @@ class OpenMLDataset(OpenMLBase):
         Link to a paper describing the dataset.
     update_comment : str, optional
         An explanation for when the dataset is uploaded.
-    status : str, optional
-        Whether the dataset is active.
     md5_checksum : str, optional
         MD5 checksum to check if the dataset is downloaded without corruption.
     data_file : str, optional

From 88b7cc0292bb5a7b86a9f45cf29d1733ee3cc300 Mon Sep 17 00:00:00 2001
From: Joaquin Vanschoren <joaquin.vanschoren@gmail.com>
Date: Thu, 22 Oct 2020 10:03:34 +0200
Subject: [PATCH 43/48] Improved documentation of example (#960)

* Improved documentation of example

* Update examples/30_extended/create_upload_tutorial.py

Co-authored-by: PGijsbers <p.gijsbers@tue.nl>

Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Co-authored-by: PGijsbers <p.gijsbers@tue.nl>
---
 examples/30_extended/create_upload_tutorial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index f0ea00016..0692b9b09 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -100,8 +100,8 @@
     # The attribute that represents the row-id column, if present in the
     # dataset.
     row_id_attribute=None,
-    # Attributes that should be excluded in modelling, such as identifiers and
-    # indexes.
+    # Attribute or list of attributes that should be excluded in modelling, such as 
+    # identifiers and indexes. E.g. "feat1" or ["feat1","feat2"]
     ignore_attribute=None,
     # How to cite the paper.
     citation=citation,

From bf3cd2ebaac10bd05809a1ce90e346248c4c61b1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Fri, 23 Oct 2020 02:39:26 -0700
Subject: [PATCH 44/48] Dataframe run on task (#777)

* run on tasks allows dataframes

* don't force third subcomponent part to be list

* Making DataFrame default behaviour for runs; Fixing test cases for the same

* Fixing PEP8 + Adding docstring to CustomImputer()

* run on tasks allows dataframes

* Attempting rebase

* Fixing test cases

* Trying test case fixes

* run on tasks allows dataframes

* don't force third subcomponent part to be list

* Making DataFrame default behaviour for runs; Fixing test cases for the same

* Fixing PEP8 + Adding docstring to CustomImputer()

* Attempting rebase

* Fixing test cases

* Trying test case fixes

* Allowing functions in subcomponents

* Fixing test cases

* Adding dataset output param to run

* Fixing test cases

* Changes suggested by mfeurer

* Editing predict_proba function

* Test case fix

* Test case fix

* Edit unit test to bypass server issue

* Fixing unit test

* Reiterating with @PGijsbers comments

* Minor fixes to test cases

* Adding unit test and suggestions from @mfeurer

* Fixing test case for all sklearn versions

* Testing changes

* Fixing import in example

* Triggering unit tests

* Degugging failed example script

* Adding unit tests

* Push for debugging

* Push for @mfeurer to debug

* Resetting to debug

* Updating branch

* pre-commit fixes

* Handling failing examples

* Reiteration with clean ups and minor fixes

* Closing comments

* Black fixes

* feedback from @mfeurer

* Minor fix

* suggestions from @PGijsbers

Co-authored-by: neeratyoy <neeratyoy@gmail.com>
Co-authored-by: neeratyoy <de4nas@gmail.com>
---
 .travis.yml                                   |  29 +--
 examples/30_extended/datasets_tutorial.py     |   2 +-
 examples/30_extended/flow_id_tutorial.py      |   8 +
 examples/30_extended/run_setup_tutorial.py    |  40 +++-
 examples/30_extended/study_tutorial.py        |  37 ++-
 openml/__init__.py                            |   1 +
 openml/datasets/functions.py                  |   6 +-
 openml/exceptions.py                          |   2 +-
 openml/extensions/sklearn/extension.py        |  87 ++++---
 openml/flows/flow.py                          |   8 +-
 openml/runs/functions.py                      |  66 +++--
 openml/testing.py                             |  19 +-
 .../test_sklearn_extension.py                 | 162 +++++++++++--
 tests/test_runs/test_run_functions.py         | 226 ++++++++++++++----
 tests/test_study/test_study_examples.py       |  27 ++-
 15 files changed, 560 insertions(+), 160 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 80f3bda42..9fd33403c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,20 +15,21 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
-    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
-    # Checks for older scikit-learn versions (which also don't nicely work with
-    # Python3.7)
-    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
-    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" COVERAGE="true" DOCPUSH="true" SKIP_TESTS="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" RUN_FLAKE8="true" SKIP_TESTS="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.2"
+  # Checks for older scikit-learn versions (which also don't nicely work with
+  # Python3.7)
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
 
 # Travis issue
 # https://github.com/travis-ci/travis-ci/issues/8920
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index e129b7718..b15260fb4 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -66,7 +66,7 @@
 ############################################################################
 # Get the actual data.
 #
-# The dataset can be returned in 2 possible formats: as a NumPy array, a SciPy
+# The dataset can be returned in 3 possible formats: as a NumPy array, a SciPy
 # sparse matrix, or as a Pandas DataFrame. The format is
 # controlled with the parameter ``dataset_format`` which can be either 'array'
 # (default) or 'dataframe'. Let's first build our dataset from a NumPy array
diff --git a/examples/30_extended/flow_id_tutorial.py b/examples/30_extended/flow_id_tutorial.py
index ef3689ea1..e77df8d1a 100644
--- a/examples/30_extended/flow_id_tutorial.py
+++ b/examples/30_extended/flow_id_tutorial.py
@@ -15,6 +15,11 @@
 
 import openml
 
+
+# Activating test server
+openml.config.start_using_configuration_for_example()
+
+
 clf = sklearn.tree.DecisionTreeClassifier()
 
 ####################################################################################################
@@ -69,3 +74,6 @@
 # This also works with the actual model (generalizing the first part of this example):
 flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
 print(flow_ids)
+
+# Deactivating test server
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index be438e728..a46bf9699 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -37,6 +37,11 @@
 import sklearn.ensemble
 import sklearn.impute
 import sklearn.preprocessing
+from sklearn.pipeline import make_pipeline, Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
+from sklearn.experimental import enable_hist_gradient_boosting
 
 
 openml.config.start_using_configuration_for_example()
@@ -52,22 +57,39 @@
 # we will create a fairly complex model, with many preprocessing components and
 # many potential hyperparameters. Of course, the model can be as complex and as
 # easy as you want it to be
-model_original = sklearn.pipeline.make_pipeline(
-    sklearn.impute.SimpleImputer(), sklearn.ensemble.RandomForestClassifier()
-)
 
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.decomposition import TruncatedSVD
+
+
+# Helper functions to return required columns for ColumnTransformer
+def cont(X):
+    return X.dtypes != "category"
+
+
+def cat(X):
+    return X.dtypes == "category"
+
+
+cat_imp = make_pipeline(
+    SimpleImputer(strategy="most_frequent"),
+    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    TruncatedSVD(),
+)
+ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", "passthrough", cont)])
+model_original = sklearn.pipeline.Pipeline(
+    steps=[("transform", ct), ("estimator", HistGradientBoostingClassifier()),]
+)
 
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for
 # the purpose of this tutorial we set them to some specific values that might
 # or might not be optimal
 hyperparameters_original = {
-    "simpleimputer__strategy": "median",
-    "randomforestclassifier__criterion": "entropy",
-    "randomforestclassifier__max_features": 0.2,
-    "randomforestclassifier__min_samples_leaf": 1,
-    "randomforestclassifier__n_estimators": 16,
-    "randomforestclassifier__random_state": 42,
+    "estimator__loss": "auto",
+    "estimator__learning_rate": 0.15,
+    "estimator__max_iter": 50,
+    "estimator__min_samples_leaf": 1,
 }
 model_original.set_params(**hyperparameters_original)
 
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index b9202d7ce..c02a5c038 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -17,8 +17,11 @@
 
 import numpy as np
 import sklearn.tree
-import sklearn.pipeline
-import sklearn.impute
+from sklearn.pipeline import make_pipeline, Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.decomposition import TruncatedSVD
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
 
 import openml
 
@@ -68,7 +71,7 @@
 )
 print(evaluations.head())
 
-############################################################################
+###########################################################from openml.testing import cat, cont#################
 # Uploading studies
 # =================
 #
@@ -78,12 +81,30 @@
 
 openml.config.start_using_configuration_for_example()
 
-# Very simple classifier which ignores the feature type
+# Model that can handle missing values
+from sklearn.experimental import enable_hist_gradient_boosting
+from sklearn.ensemble import HistGradientBoostingClassifier
+
+
+# Helper functions to return required columns for ColumnTransformer
+def cont(X):
+    return X.dtypes != "category"
+
+
+def cat(X):
+    return X.dtypes == "category"
+
+
+cat_imp = make_pipeline(
+    SimpleImputer(strategy="most_frequent"),
+    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    TruncatedSVD(),
+)
+ct = ColumnTransformer(
+    [("cat", cat_imp, cat), ("cont", FunctionTransformer(lambda x: x, validate=False), cont)]
+)
 clf = sklearn.pipeline.Pipeline(
-    steps=[
-        ("imputer", sklearn.impute.SimpleImputer()),
-        ("estimator", sklearn.tree.DecisionTreeClassifier(max_depth=5)),
-    ]
+    steps=[("transform", ct), ("estimator", HistGradientBoostingClassifier()),]
 )
 
 suite = openml.study.get_suite(1)
diff --git a/openml/__init__.py b/openml/__init__.py
index 621703332..0bab3b1d5 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -113,6 +113,7 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None, run_ids=None)
     "study",
     "utils",
     "_api_calls",
+    "__version__",
 ]
 
 # Load the scikit-learn extension by default
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 0f3037a74..550747eac 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -815,12 +815,12 @@ def edit_dataset(
 ) -> int:
     """
       Edits an OpenMLDataset.
-      Specify atleast one field to edit, apart from data_id
+      Specify at least one field to edit, apart from data_id
        - For certain fields, a new dataset version is created : attributes, data,
        default_target_attribute, ignore_attribute, row_id_attribute.
 
-       - For other fields, the uploader can edit the exisiting version.
-        Noone except the uploader can edit the exisitng version.
+       - For other fields, the uploader can edit the existing version.
+        No one except the uploader can edit the existing version.
 
       Parameters
       ----------
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 07eb64e6c..781784ee2 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -27,7 +27,7 @@ def __init__(self, message: str, code: int = None, url: str = None):
         self.url = url
         super().__init__(message)
 
-    def __repr__(self):
+    def __str__(self):
         return "%s returned code %s: %s" % (self.url, self.code, self.message,)
 
 
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 2b94d2cfd..edb14487b 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -11,7 +11,7 @@
 from re import IGNORECASE
 import sys
 import time
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, cast
 import warnings
 
 import numpy as np
@@ -1546,7 +1546,7 @@ def _run_model_on_fold(
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, "OrderedDict[str, float]", Optional[OpenMLRunTrace]]:
+    ) -> Tuple[np.ndarray, pd.DataFrame, "OrderedDict[str, float]", Optional[OpenMLRunTrace]]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction
         information.
 
@@ -1579,24 +1579,21 @@ def _run_model_on_fold(
 
         Returns
         -------
-        arff_datacontent : List[List]
-            Arff representation (list of lists) of the predictions that were
-            generated by this fold (required to populate predictions.arff)
-        arff_tracecontent :  List[List]
-            Arff representation (list of lists) of the trace data that was generated by this
-            fold
-            (will be used to populate trace.arff, leave it empty if the model did not perform
-            any
-            hyperparameter optimization).
+        pred_y : np.ndarray
+            Predictions on the training/test set, depending on the task type.
+            For supervised tasks, predicitons are on the test set.
+            For unsupervised tasks, predicitons are on the training set.
+        proba_y : pd.DataFrame
+            Predicted probabilities for the test set.
+            None, if task is not Classification or Learning Curve prediction.
         user_defined_measures : OrderedDict[str, float]
             User defined measures that were generated on this fold
-        model : Any
-            The model trained on this repeat,fold,subsample triple. Will be used to generate
-            trace
-            information later on (in ``obtain_arff_trace``).
+        trace : Optional[OpenMLRunTrace]]
+            arff trace object from a fitted model and the trace content obtained by
+            repeatedly calling ``run_model_on_task``
         """
 
-        def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarray:
+        def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.DataFrame:
             """Transforms predicted probabilities to match with OpenML class indices.
 
             Parameters
@@ -1609,16 +1606,31 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
             Returns
             -------
-            np.ndarray
+            pd.DataFrame
             """
+
+            if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
+                if task.class_labels is not None:
+                    if isinstance(y_train, np.ndarray) and isinstance(task.class_labels[0], str):
+                        # mapping (decoding) the predictions to the categories
+                        # creating a separate copy to not change the expected pred_y type
+                        y = [task.class_labels[pred] for pred in y]
+                else:
+                    raise ValueError("The task has no class labels")
+            else:
+                return None
+
             # y: list or numpy array of predictions
             # model_classes: sklearn classifier mapping from original array id to
             # prediction index id
-            if not isinstance(classes, list):
-                raise ValueError("please convert model classes to list prior to " "calling this fn")
-            result = np.zeros((len(y), len(classes)), dtype=np.float32)
-            for obs, prediction_idx in enumerate(y):
-                result[obs][prediction_idx] = 1.0
+            if not isinstance(model_classes, list):
+                raise ValueError("please convert model classes to list prior to calling this fn")
+            # DataFrame allows more accurate mapping of classes as column names
+            result = pd.DataFrame(
+                0, index=np.arange(len(y)), columns=model_classes, dtype=np.float32
+            )
+            for obs, prediction in enumerate(y):
+                result.loc[obs, prediction] = 1.0
             return result
 
         if isinstance(task, OpenMLSupervisedTask):
@@ -1677,6 +1689,16 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
             else:
                 model_classes = used_estimator.classes_
 
+            if not isinstance(model_classes, list):
+                model_classes = model_classes.tolist()
+
+            # to handle the case when dataset is numpy and categories are encoded
+            # however the class labels stored in task are still categories
+            if isinstance(y_train, np.ndarray) and isinstance(
+                cast(List, task.class_labels)[0], str
+            ):
+                model_classes = [cast(List[str], task.class_labels)[i] for i in model_classes]
+
         modelpredict_start_cputime = time.process_time()
         modelpredict_start_walltime = time.time()
 
@@ -1708,9 +1730,10 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
             try:
                 proba_y = model_copy.predict_proba(X_test)
-            except AttributeError:
+                proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
+            except AttributeError:  # predict_proba is not available when probability=False
                 if task.class_labels is not None:
-                    proba_y = _prediction_to_probabilities(pred_y, list(task.class_labels))
+                    proba_y = _prediction_to_probabilities(pred_y, model_classes)
                 else:
                     raise ValueError("The task has no class labels")
 
@@ -1726,20 +1749,24 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
                     # then we need to add a column full of zeros into the probabilities
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
-                    proba_y_new = np.zeros((proba_y.shape[0], len(task.class_labels)))
-                    for idx, model_class in enumerate(model_classes):
-                        proba_y_new[:, model_class] = proba_y[:, idx]
-                    proba_y = proba_y_new
-
-                if proba_y.shape[1] != len(task.class_labels):
                     message = "Estimator only predicted for {}/{} classes!".format(
                         proba_y.shape[1], len(task.class_labels),
                     )
                     warnings.warn(message)
                     openml.config.logger.warn(message)
+
+                    for i, col in enumerate(task.class_labels):
+                        # adding missing columns with 0 probability
+                        if col not in model_classes:
+                            proba_y[col] = 0
+                    proba_y = proba_y[task.class_labels]
             else:
                 raise ValueError("The task has no class labels")
 
+            if not np.all(set(proba_y.columns) == set(task.class_labels)):
+                missing_cols = list(set(task.class_labels) - set(proba_y.columns))
+                raise ValueError("Predicted probabilities missing for the columns: ", missing_cols)
+
         elif isinstance(task, OpenMLRegressionTask):
             proba_y = None
 
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 47939c867..5aaf70a9d 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -263,7 +263,13 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         for key in self.components:
             component_dict = OrderedDict()  # type: 'OrderedDict[str, Dict]'
             component_dict["oml:identifier"] = key
-            component_dict["oml:flow"] = self.components[key]._to_dict()["oml:flow"]
+            if self.components[key] in ["passthrough", "drop"]:
+                component_dict["oml:flow"] = {
+                    "oml-python:serialized_object": "component_reference",
+                    "value": {"key": self.components[key], "step_name": self.components[key]},
+                }
+            else:
+                component_dict["oml:flow"] = self.components[key]._to_dict()["oml:flow"]
 
             for key_ in component_dict:
                 # We only need to check if the key is a string, because the
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 2b767eaa1..99007aa2a 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -52,6 +52,7 @@ def run_model_on_task(
     add_local_measures: bool = True,
     upload_flow: bool = False,
     return_flow: bool = False,
+    dataset_format: str = "dataframe",
 ) -> Union[OpenMLRun, Tuple[OpenMLRun, OpenMLFlow]]:
     """Run the model on the dataset defined by the task.
 
@@ -79,6 +80,9 @@ def run_model_on_task(
         If False, do not upload the flow to OpenML.
     return_flow : bool (default=False)
         If True, returns the OpenMLFlow generated from the model in addition to the OpenMLRun.
+    dataset_format : str (default='dataframe')
+        If 'array', the dataset is passed to the model as a numpy array.
+        If 'dataframe', the dataset is passed to the model as a pandas dataframe.
 
     Returns
     -------
@@ -125,6 +129,7 @@ def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTas
         seed=seed,
         add_local_measures=add_local_measures,
         upload_flow=upload_flow,
+        dataset_format=dataset_format,
     )
     if return_flow:
         return run, flow
@@ -139,6 +144,7 @@ def run_flow_on_task(
     seed: int = None,
     add_local_measures: bool = True,
     upload_flow: bool = False,
+    dataset_format: str = "dataframe",
 ) -> OpenMLRun:
 
     """Run the model provided by the flow on the dataset defined by task.
@@ -171,6 +177,9 @@ def run_flow_on_task(
     upload_flow : bool (default=False)
         If True, upload the flow to OpenML if it does not exist yet.
         If False, do not upload the flow to OpenML.
+    dataset_format : str (default='dataframe')
+        If 'array', the dataset is passed to the model as a numpy array.
+        If 'dataframe', the dataset is passed to the model as a pandas dataframe.
 
     Returns
     -------
@@ -248,6 +257,7 @@ def run_flow_on_task(
         task=task,
         extension=flow.extension,
         add_local_measures=add_local_measures,
+        dataset_format=dataset_format,
     )
 
     data_content, trace, fold_evaluations, sample_evaluations = res
@@ -407,6 +417,7 @@ def _run_task_get_arffcontent(
     task: OpenMLTask,
     extension: "Extension",
     add_local_measures: bool,
+    dataset_format: str,
 ) -> Tuple[
     List[List],
     Optional[OpenMLRunTrace],
@@ -437,14 +448,23 @@ def _run_task_get_arffcontent(
             repeat=rep_no, fold=fold_no, sample=sample_no
         )
         if isinstance(task, OpenMLSupervisedTask):
-            x, y = task.get_X_and_y(dataset_format="array")
-            train_x = x[train_indices]
-            train_y = y[train_indices]
-            test_x = x[test_indices]
-            test_y = y[test_indices]
+            x, y = task.get_X_and_y(dataset_format=dataset_format)
+            if dataset_format == "dataframe":
+                train_x = x.iloc[train_indices]
+                train_y = y.iloc[train_indices]
+                test_x = x.iloc[test_indices]
+                test_y = y.iloc[test_indices]
+            else:
+                train_x = x[train_indices]
+                train_y = y[train_indices]
+                test_x = x[test_indices]
+                test_y = y[test_indices]
         elif isinstance(task, OpenMLClusteringTask):
-            x = task.get_X(dataset_format="array")
-            train_x = x[train_indices]
+            x = task.get_X(dataset_format=dataset_format)
+            if dataset_format == "dataframe":
+                train_x = x.iloc[train_indices]
+            else:
+                train_x = x[train_indices]
             train_y = None
             test_x = None
             test_y = None
@@ -480,17 +500,33 @@ def _calculate_local_measure(sklearn_fn, openml_name):
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
 
             for i, tst_idx in enumerate(test_indices):
-
                 if task.class_labels is not None:
+                    prediction = (
+                        task.class_labels[pred_y[i]] if isinstance(pred_y[i], int) else pred_y[i]
+                    )
+                    if isinstance(test_y, pd.Series):
+                        test_prediction = (
+                            task.class_labels[test_y.iloc[i]]
+                            if isinstance(test_y.iloc[i], int)
+                            else test_y.iloc[i]
+                        )
+                    else:
+                        test_prediction = (
+                            task.class_labels[test_y[i]]
+                            if isinstance(test_y[i], int)
+                            else test_y[i]
+                        )
+                    pred_prob = proba_y.iloc[i] if isinstance(proba_y, pd.DataFrame) else proba_y[i]
+
                     arff_line = format_prediction(
                         task=task,
                         repeat=rep_no,
                         fold=fold_no,
                         sample=sample_no,
                         index=tst_idx,
-                        prediction=task.class_labels[pred_y[i]],
-                        truth=task.class_labels[test_y[i]],
-                        proba=dict(zip(task.class_labels, proba_y[i])),
+                        prediction=prediction,
+                        truth=test_prediction,
+                        proba=dict(zip(task.class_labels, pred_prob)),
                     )
                 else:
                     raise ValueError("The task has no class labels")
@@ -504,14 +540,15 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
         elif isinstance(task, OpenMLRegressionTask):
 
-            for i in range(0, len(test_indices)):
+            for i, _ in enumerate(test_indices):
+                test_prediction = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
                     task=task,
                     repeat=rep_no,
                     fold=fold_no,
                     index=test_indices[i],
                     prediction=pred_y[i],
-                    truth=test_y[i],
+                    truth=test_prediction,
                 )
 
                 arff_datacontent.append(arff_line)
@@ -522,7 +559,8 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                 )
 
         elif isinstance(task, OpenMLClusteringTask):
-            for i in range(0, len(test_indices)):
+
+            for i, _ in enumerate(test_indices):
                 arff_line = [test_indices[i], pred_y[i]]  # row_id, cluster ID
                 arff_datacontent.append(arff_line)
 
diff --git a/openml/testing.py b/openml/testing.py
index 0b4c50972..da07b0ed7 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -258,4 +258,21 @@ def _check_fold_timing_evaluations(
     from sklearn.preprocessing import Imputer as SimpleImputer
 
 
-__all__ = ["TestBase", "SimpleImputer"]
+class CustomImputer(SimpleImputer):
+    """Duplicate class alias for sklearn's SimpleImputer
+
+    Helps bypass the sklearn extension duplicate operation check
+    """
+
+    pass
+
+
+def cont(X):
+    return X.dtypes != "category"
+
+
+def cat(X):
+    return X.dtypes == "category"
+
+
+__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "cat", "cont"]
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 90f69df17..d34dc2ad3 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -13,6 +13,7 @@
 from packaging import version
 
 import numpy as np
+import pandas as pd
 import scipy.optimize
 import scipy.stats
 import sklearn.base
@@ -39,7 +40,7 @@
 from openml.flows import OpenMLFlow
 from openml.flows.functions import assert_flows_equal
 from openml.runs.trace import OpenMLRunTrace
-from openml.testing import TestBase, SimpleImputer
+from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
 
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
@@ -537,8 +538,7 @@ def test_serialize_column_transformer(self):
         fixture = (
             "sklearn.compose._column_transformer.ColumnTransformer("
             "numeric=sklearn.preprocessing.{}.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder,"
-            "drop=drop)".format(scaler_name)
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder,drop=drop)".format(scaler_name)
         )
         fixture_short_name = "sklearn.ColumnTransformer"
 
@@ -564,13 +564,7 @@ def test_serialize_column_transformer(self):
             "drop": ["drop"],
         }
 
-        serialization, new_model = self._serialization_test_helper(
-            model,
-            X=None,
-            y=None,
-            subcomponent_parameters=["transformers", "numeric", "nominal"],
-            dependencies_mock_call_count=(4, 8),
-        )
+        serialization = self.extension.model_to_flow(model)
         structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture)
         self.assertEqual(serialization.custom_name, fixture_short_name)
@@ -1566,12 +1560,15 @@ def setUp(self):
     # Test methods for performing runs with this extension module
 
     def test_run_model_on_task(self):
-        class MyPipe(sklearn.pipeline.Pipeline):
-            pass
-
         task = openml.tasks.get_task(1)
-        pipe = MyPipe([("imp", SimpleImputer()), ("dummy", sklearn.dummy.DummyClassifier())])
-        openml.runs.run_model_on_task(pipe, task)
+        # using most_frequent imputer since dataset has mixed types and to keep things simple
+        pipe = sklearn.pipeline.Pipeline(
+            [
+                ("imp", SimpleImputer(strategy="most_frequent")),
+                ("dummy", sklearn.dummy.DummyClassifier()),
+            ]
+        )
+        openml.runs.run_model_on_task(pipe, task, dataset_format="array")
 
     def test_seed_model(self):
         # randomized models that are initialized without seeds, can be seeded
@@ -1627,7 +1624,7 @@ def test_seed_model_raises(self):
             with self.assertRaises(ValueError):
                 self.extension.seed_model(model=clf, seed=42)
 
-    def test_run_model_on_fold_classification_1(self):
+    def test_run_model_on_fold_classification_1_array(self):
         task = openml.tasks.get_task(1)
 
         X, y = task.get_X_and_y()
@@ -1656,14 +1653,87 @@ def test_run_model_on_fold_classification_1(self):
         # predictions
         self.assertIsInstance(y_hat, np.ndarray)
         self.assertEqual(y_hat.shape, y_test.shape)
-        self.assertIsInstance(y_hat_proba, np.ndarray)
+        self.assertIsInstance(y_hat_proba, pd.DataFrame)
         self.assertEqual(y_hat_proba.shape, (y_test.shape[0], 6))
         np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
         # The class '4' (at index 3) is not present in the training data. We check that the
         # predicted probabilities for that class are zero!
-        np.testing.assert_array_almost_equal(y_hat_proba[:, 3], np.zeros(y_test.shape))
+        np.testing.assert_array_almost_equal(
+            y_hat_proba.iloc[:, 3].to_numpy(), np.zeros(y_test.shape)
+        )
         for i in (0, 1, 2, 4, 5):
-            self.assertTrue(np.any(y_hat_proba[:, i] != np.zeros(y_test.shape)))
+            self.assertTrue(np.any(y_hat_proba.iloc[:, i].to_numpy() != np.zeros(y_test.shape)))
+
+        # check user defined measures
+        fold_evaluations = collections.defaultdict(lambda: collections.defaultdict(dict))
+        for measure in user_defined_measures:
+            fold_evaluations[measure][0][0] = user_defined_measures[measure]
+
+        # trace. SGD does not produce any
+        self.assertIsNone(trace)
+
+        self._check_fold_timing_evaluations(
+            fold_evaluations,
+            num_repeats=1,
+            num_folds=1,
+            task_type=task.task_type_id,
+            check_scores=False,
+        )
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
+        "Pipeline till 0.20 doesn't support indexing and 'passthrough'",
+    )
+    def test_run_model_on_fold_classification_1_dataframe(self):
+        from sklearn.compose import ColumnTransformer
+
+        task = openml.tasks.get_task(1)
+
+        # diff test_run_model_on_fold_classification_1_array()
+        X, y = task.get_X_and_y(dataset_format="dataframe")
+        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
+        X_train = X.iloc[train_indices]
+        y_train = y.iloc[train_indices]
+        X_test = X.iloc[test_indices]
+        y_test = y.iloc[test_indices]
+
+        # Helper functions to return required columns for ColumnTransformer
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"),
+            OneHotEncoder(handle_unknown="ignore", sparse=False),
+        )
+        cont_imp = make_pipeline(CustomImputer(strategy="mean"), StandardScaler())
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
+        pipeline = sklearn.pipeline.Pipeline(
+            steps=[("transform", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
+        )
+        # TODO add some mocking here to actually test the innards of this function, too!
+        res = self.extension._run_model_on_fold(
+            model=pipeline,
+            task=task,
+            fold_no=0,
+            rep_no=0,
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+        )
+
+        y_hat, y_hat_proba, user_defined_measures, trace = res
+
+        # predictions
+        self.assertIsInstance(y_hat, np.ndarray)
+        self.assertEqual(y_hat.shape, y_test.shape)
+        self.assertIsInstance(y_hat_proba, pd.DataFrame)
+        self.assertEqual(y_hat_proba.shape, (y_test.shape[0], 6))
+        np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
+        # The class '4' (at index 3) is not present in the training data. We check that the
+        # predicted probabilities for that class are zero!
+        np.testing.assert_array_almost_equal(
+            y_hat_proba.iloc[:, 3].to_numpy(), np.zeros(y_test.shape)
+        )
+        for i in (0, 1, 2, 4, 5):
+            self.assertTrue(np.any(y_hat_proba.iloc[:, i].to_numpy() != np.zeros(y_test.shape)))
 
         # check user defined measures
         fold_evaluations = collections.defaultdict(lambda: collections.defaultdict(dict))
@@ -1710,11 +1780,11 @@ def test_run_model_on_fold_classification_2(self):
         # predictions
         self.assertIsInstance(y_hat, np.ndarray)
         self.assertEqual(y_hat.shape, y_test.shape)
-        self.assertIsInstance(y_hat_proba, np.ndarray)
+        self.assertIsInstance(y_hat_proba, pd.DataFrame)
         self.assertEqual(y_hat_proba.shape, (y_test.shape[0], 2))
         np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
         for i in (0, 1):
-            self.assertTrue(np.any(y_hat_proba[:, i] != np.zeros(y_test.shape)))
+            self.assertTrue(np.any(y_hat_proba.to_numpy()[:, i] != np.zeros(y_test.shape)))
 
         # check user defined measures
         fold_evaluations = collections.defaultdict(lambda: collections.defaultdict(dict))
@@ -1791,14 +1861,14 @@ def predict_proba(*args, **kwargs):
             np.testing.assert_array_almost_equal(np.sum(proba_1, axis=1), np.ones(X_test.shape[0]))
             # Test that there are predictions other than ones and zeros
             self.assertLess(
-                np.sum(proba_1 == 0) + np.sum(proba_1 == 1),
+                np.sum(proba_1.to_numpy() == 0) + np.sum(proba_1.to_numpy() == 1),
                 X_test.shape[0] * len(task.class_labels),
             )
 
             np.testing.assert_array_almost_equal(np.sum(proba_2, axis=1), np.ones(X_test.shape[0]))
             # Test that there are only ones and zeros predicted
             self.assertEqual(
-                np.sum(proba_2 == 0) + np.sum(proba_2 == 1),
+                np.sum(proba_2.to_numpy() == 0) + np.sum(proba_2.to_numpy() == 1),
                 X_test.shape[0] * len(task.class_labels),
             )
 
@@ -2099,3 +2169,49 @@ def test_sklearn_serialization_with_none_step(self):
         )
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.model_to_flow(clf)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
+    def test_failed_serialization_of_custom_class(self):
+        """Test to check if any custom class inherited from sklearn expectedly fails serialization
+        """
+        try:
+            from sklearn.impute import SimpleImputer
+        except ImportError:
+            # for lower versions
+            from sklearn.preprocessing import Imputer as SimpleImputer
+
+        class CustomImputer(SimpleImputer):
+            pass
+
+        def cont(X):
+            return X.dtypes != "category"
+
+        def cat(X):
+            return X.dtypes == "category"
+
+        import sklearn.metrics
+        import sklearn.tree
+        from sklearn.pipeline import Pipeline, make_pipeline
+        from sklearn.compose import ColumnTransformer
+        from sklearn.preprocessing import OneHotEncoder, StandardScaler
+
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
+        )
+        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
+        clf = Pipeline(
+            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
+        )  # build a sklearn classifier
+
+        task = openml.tasks.get_task(253)  # data with mixed types from test server
+        try:
+            _ = openml.runs.run_model_on_task(clf, task)
+        except AttributeError as e:
+            if e.args[0] == "module '__main__' has no attribute '__version__'":
+                raise AttributeError(e)
+            else:
+                raise Exception(e)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index dcc7b0b96..89f01c72e 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -21,7 +21,7 @@
 import pandas as pd
 
 import openml.extensions.sklearn
-from openml.testing import TestBase, SimpleImputer
+from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
 from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskType
@@ -31,13 +31,13 @@
 from sklearn.tree import DecisionTreeClassifier
 
 from sklearn.dummy import DummyClassifier
-from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.feature_selection import VarianceThreshold
 from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
 from sklearn.svm import SVC
 from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import Pipeline, make_pipeline
 
 
 class TestRun(TestBase):
@@ -348,9 +348,13 @@ def test_run_regression_on_classif_task(self):
 
         clf = LinearRegression()
         task = openml.tasks.get_task(task_id)
-        with self.assertRaises(AttributeError):
+        # internally dataframe is loaded and targets are categorical
+        # which LinearRegression() cannot handle
+        with self.assertRaisesRegex(
+            AttributeError, "'LinearRegression' object has no attribute 'classes_'"
+        ):
             openml.runs.run_model_on_task(
-                model=clf, task=task, avoid_duplicate_runs=False,
+                model=clf, task=task, avoid_duplicate_runs=False, dataset_format="array",
             )
 
     def test_check_erronous_sklearn_flow_fails(self):
@@ -553,18 +557,26 @@ def test_run_and_upload_column_transformer_pipeline(self):
         def get_ct_cf(nominal_indices, numeric_indices):
             inner = sklearn.compose.ColumnTransformer(
                 transformers=[
-                    ("numeric", sklearn.preprocessing.StandardScaler(), nominal_indices),
                     (
-                        "nominal",
-                        sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                        "numeric",
+                        make_pipeline(
+                            SimpleImputer(strategy="mean"), sklearn.preprocessing.StandardScaler()
+                        ),
                         numeric_indices,
                     ),
+                    (
+                        "nominal",
+                        make_pipeline(
+                            CustomImputer(strategy="most_frequent"),
+                            sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
+                        ),
+                        nominal_indices,
+                    ),
                 ],
                 remainder="passthrough",
             )
             return sklearn.pipeline.Pipeline(
                 steps=[
-                    ("imputer", sklearn.impute.SimpleImputer(strategy="constant", fill_value=-1)),
                     ("transformer", inner),
                     ("classifier", sklearn.tree.DecisionTreeClassifier()),
                 ]
@@ -590,25 +602,36 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
-    def test_run_and_upload_decision_tree_pipeline(self):
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
+    def test_run_and_upload_knn_pipeline(self):
+
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
+        )
+        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        from sklearn.compose import ColumnTransformer
+        from sklearn.neighbors import KNeighborsClassifier
+
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
         pipeline2 = Pipeline(
             steps=[
-                ("Imputer", SimpleImputer(strategy="median")),
+                ("Imputer", ct),
                 ("VarianceThreshold", VarianceThreshold()),
                 (
                     "Estimator",
                     RandomizedSearchCV(
-                        DecisionTreeClassifier(),
-                        {
-                            "min_samples_split": [2 ** x for x in range(1, 8)],
-                            "min_samples_leaf": [2 ** x for x in range(0, 7)],
-                        },
+                        KNeighborsClassifier(),
+                        {"n_neighbors": [x for x in range(2, 10)]},
                         cv=3,
                         n_iter=10,
                     ),
                 ),
             ]
         )
+
         task_id = self.TEST_SERVER_TASK_MISSING_VALS[0]
         n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS[1]
         n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS[2]
@@ -732,19 +755,31 @@ def test_learning_curve_task_2(self):
         )
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="Pipelines don't support indexing (used for the assert check)",
+    )
     def test_initialize_cv_from_run(self):
-        randomsearch = RandomizedSearchCV(
-            RandomForestClassifier(n_estimators=5),
-            {
-                "max_depth": [3, None],
-                "max_features": [1, 2, 3, 4],
-                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                "bootstrap": [True, False],
-                "criterion": ["gini", "entropy"],
-            },
-            cv=StratifiedKFold(n_splits=2, shuffle=True),
-            n_iter=2,
+        randomsearch = Pipeline(
+            [
+                ("enc", OneHotEncoder(handle_unknown="ignore")),
+                (
+                    "rs",
+                    RandomizedSearchCV(
+                        RandomForestClassifier(n_estimators=5),
+                        {
+                            "max_depth": [3, None],
+                            "max_features": [1, 2, 3, 4],
+                            "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                            "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                            "bootstrap": [True, False],
+                            "criterion": ["gini", "entropy"],
+                        },
+                        cv=StratifiedKFold(n_splits=2, shuffle=True),
+                        n_iter=2,
+                    ),
+                ),
+            ]
         )
 
         task = openml.tasks.get_task(11)
@@ -759,8 +794,8 @@ def test_initialize_cv_from_run(self):
         modelR = openml.runs.initialize_model_from_run(run_id=run.run_id)
         modelS = openml.setups.initialize_model(setup_id=run.setup_id)
 
-        self.assertEqual(modelS.cv.random_state, 62501)
-        self.assertEqual(modelR.cv.random_state, 62501)
+        self.assertEqual(modelS[-1].cv.random_state, 62501)
+        self.assertEqual(modelR[-1].cv.random_state, 62501)
 
     def _test_local_evaluations(self, run):
 
@@ -793,12 +828,23 @@ def _test_local_evaluations(self, run):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
     def test_local_run_swapped_parameter_order_model(self):
 
         # construct sci-kit learn classifier
         clf = Pipeline(
             steps=[
-                ("imputer", SimpleImputer(strategy="median")),
+                (
+                    "imputer",
+                    make_pipeline(
+                        SimpleImputer(strategy="most_frequent"),
+                        OneHotEncoder(handle_unknown="ignore"),
+                    ),
+                ),
+                # random forest doesn't take categoricals
                 ("estimator", RandomForestClassifier()),
             ]
         )
@@ -813,13 +859,18 @@ def test_local_run_swapped_parameter_order_model(self):
 
         self._test_local_evaluations(run)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
     def test_local_run_swapped_parameter_order_flow(self):
 
         # construct sci-kit learn classifier
         clf = Pipeline(
             steps=[
-                ("imputer", SimpleImputer(strategy="median")),
-                ("estimator", RandomForestClassifier()),
+                ("imputer", SimpleImputer(strategy="most_frequent")),
+                ("encoder", OneHotEncoder(handle_unknown="ignore")),
+                ("estimator", RandomForestClassifier(n_estimators=10)),
             ]
         )
 
@@ -834,13 +885,18 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         self._test_local_evaluations(run)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
     def test_local_run_metric_score(self):
 
         # construct sci-kit learn classifier
         clf = Pipeline(
             steps=[
-                ("imputer", SimpleImputer(strategy="median")),
-                ("estimator", RandomForestClassifier()),
+                ("imputer", SimpleImputer(strategy="most_frequent")),
+                ("encoder", OneHotEncoder(handle_unknown="ignore")),
+                ("estimator", RandomForestClassifier(n_estimators=10)),
             ]
         )
 
@@ -863,15 +919,19 @@ def test_online_run_metric_score(self):
 
         self._test_local_evaluations(run)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
     def test_initialize_model_from_run(self):
         clf = sklearn.pipeline.Pipeline(
             steps=[
-                ("Imputer", SimpleImputer(strategy="median")),
+                ("Imputer", SimpleImputer(strategy="most_frequent")),
                 ("VarianceThreshold", VarianceThreshold(threshold=0.05)),
                 ("Estimator", GaussianNB()),
             ]
         )
-        task = openml.tasks.get_task(11)
+        task = openml.tasks.get_task(1198)
         run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=False,)
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run_.run_id)
@@ -887,7 +947,7 @@ def test_initialize_model_from_run(self):
         openml.flows.assert_flows_equal(flowR, flowL)
         openml.flows.assert_flows_equal(flowS, flowL)
 
-        self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"median"')
+        self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
         self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
     @pytest.mark.flaky()
@@ -939,6 +999,10 @@ def test_get_run_trace(self):
         run_trace = openml.runs.get_run_trace(run_id)
         self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
     def test__run_exists(self):
         # would be better to not sentinel these clfs,
         # so we do not have to perform the actual runs
@@ -1080,6 +1144,10 @@ def test_run_with_illegal_flow_id_1_after_load(self):
             openml.exceptions.PyOpenMLError, expected_message_regex, loaded_run.publish
         )
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="OneHotEncoder cannot handle mixed type DataFrame as input",
+    )
     def test__run_task_get_arffcontent(self):
         task = openml.tasks.get_task(7)
         num_instances = 3196
@@ -1088,9 +1156,16 @@ def test__run_task_get_arffcontent(self):
 
         flow = unittest.mock.Mock()
         flow.name = "dummy"
-        clf = SGDClassifier(loss="log", random_state=1)
+        clf = make_pipeline(
+            OneHotEncoder(handle_unknown="ignore"), SGDClassifier(loss="log", random_state=1)
+        )
         res = openml.runs.functions._run_task_get_arffcontent(
-            flow=flow, extension=self.extension, model=clf, task=task, add_local_measures=True,
+            flow=flow,
+            extension=self.extension,
+            model=clf,
+            task=task,
+            add_local_measures=True,
+            dataset_format="dataframe",
         )
         arff_datacontent, trace, fold_evaluations, _ = res
         # predictions
@@ -1288,24 +1363,81 @@ def test_get_runs_list_by_tag(self):
         runs = openml.runs.list_runs(tag="curves")
         self.assertGreaterEqual(len(runs), 1)
 
-    def test_run_on_dataset_with_missing_labels(self):
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
+    def test_run_on_dataset_with_missing_labels_dataframe(self):
         # Check that _run_task_get_arffcontent works when one of the class
         # labels only declared in the arff file, but is not present in the
         # actual data
-
         flow = unittest.mock.Mock()
         flow.name = "dummy"
         task = openml.tasks.get_task(2)
 
+        from sklearn.compose import ColumnTransformer
+
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
+        )
+        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
         model = Pipeline(
-            steps=[
-                ("Imputer", SimpleImputer(strategy="median")),
-                ("Estimator", DecisionTreeClassifier()),
-            ]
+            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
+        )  # build a sklearn classifier
+
+        data_content, _, _, _ = _run_task_get_arffcontent(
+            flow=flow,
+            model=model,
+            task=task,
+            extension=self.extension,
+            add_local_measures=True,
+            dataset_format="dataframe",
         )
+        # 2 folds, 5 repeats; keep in mind that this task comes from the test
+        # server, the task on the live server is different
+        self.assertEqual(len(data_content), 4490)
+        for row in data_content:
+            # repeat, fold, row_id, 6 confidences, prediction and correct label
+            self.assertEqual(len(row), 12)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
+    def test_run_on_dataset_with_missing_labels_array(self):
+        # Check that _run_task_get_arffcontent works when one of the class
+        # labels only declared in the arff file, but is not present in the
+        # actual data
+        flow = unittest.mock.Mock()
+        flow.name = "dummy"
+        task = openml.tasks.get_task(2)
+        # task_id=2 on test server has 38 columns with 6 numeric columns
+        cont_idx = [3, 4, 8, 32, 33, 34]
+        cat_idx = list(set(np.arange(38)) - set(cont_idx))
+        cont = np.array([False] * 38)
+        cat = np.array([False] * 38)
+        cont[cont_idx] = True
+        cat[cat_idx] = True
+
+        from sklearn.compose import ColumnTransformer
+
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
+        )
+        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
+        model = Pipeline(
+            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
+        )  # build a sklearn classifier
 
         data_content, _, _, _ = _run_task_get_arffcontent(
-            flow=flow, model=model, task=task, extension=self.extension, add_local_measures=True,
+            flow=flow,
+            model=model,
+            task=task,
+            extension=self.extension,
+            add_local_measures=True,
+            dataset_format="array",  # diff test_run_on_dataset_with_missing_labels_dataframe()
         )
         # 2 folds, 5 repeats; keep in mind that this task comes from the test
         # server, the task on the live server is different
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 14e2405f2..fdb2747ec 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -1,12 +1,20 @@
 # License: BSD 3-Clause
 
-from openml.testing import TestBase, SimpleImputer
+from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
+
+import sklearn
+import unittest
+from distutils.version import LooseVersion
 
 
 class TestStudyFunctions(TestBase):
     _multiprocess_can_split_ = True
     """Test the example code of Bischl et al. (2018)"""
 
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
     def test_Figure1a(self):
         """Test listing in Figure 1a on a single task and the old OpenML100 study.
 
@@ -29,16 +37,19 @@ def test_Figure1a(self):
         """  # noqa: E501
         import openml
         import sklearn.metrics
-        import sklearn.pipeline
-        import sklearn.preprocessing
         import sklearn.tree
+        from sklearn.pipeline import Pipeline, make_pipeline
+        from sklearn.compose import ColumnTransformer
+        from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
         benchmark_suite = openml.study.get_study("OpenML100", "tasks")  # obtain the benchmark suite
-        clf = sklearn.pipeline.Pipeline(
-            steps=[
-                ("imputer", SimpleImputer()),
-                ("estimator", sklearn.tree.DecisionTreeClassifier()),
-            ]
+        cat_imp = make_pipeline(
+            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
+        )
+        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
+        clf = Pipeline(
+            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
         )  # build a sklearn classifier
         for task_id in benchmark_suite.tasks[:1]:  # iterate over all tasks
             task = openml.tasks.get_task(task_id)  # download the OpenML task

From 9bc84a94a16d5800da283dba68d609eb5a0c4f48 Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Fri, 23 Oct 2020 16:57:31 +0200
Subject: [PATCH 45/48] fork api (#944)

* fork api

* improve docs (+1 squashed commits)

Squashed commits:

[ec5c0d10] import changes

* minor change (+1 squashed commits)

Squashed commits:

[1822c992] improve docs (+1 squashed commits)

Squashed commits:

[ec5c0d10] import changes

* docs update

* clarify example

* Update doc/progress.rst

* Fix whitespaces for docstring

* fix error

* Use id 999999 for unknown dataset

Co-authored-by: PGijsbers <p.gijsbers@tue.nl>
---
 doc/api.rst                                   |   2 +
 doc/progress.rst                              |   2 +-
 .../30_extended/create_upload_tutorial.py     |   2 +-
 examples/30_extended/datasets_tutorial.py     |  20 ++-
 openml/datasets/__init__.py                   |   4 +
 openml/datasets/functions.py                  | 144 ++++++++++++------
 tests/test_datasets/test_dataset_functions.py |  17 ++-
 7 files changed, 132 insertions(+), 59 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 0bc092bd0..8a72e6b69 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -74,6 +74,8 @@ Modules
     list_datasets
     list_qualities
     status_update
+    edit_dataset
+    fork_dataset
 
 :mod:`openml.evaluations`: Evaluation Functions
 -----------------------------------------------
diff --git a/doc/progress.rst b/doc/progress.rst
index a9f1e2f2a..2aad9e62a 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.11.0
 ~~~~~~
-* ADD #929: Add data edit API
+* ADD #929: Add ``edit_dataset`` and ``fork_dataset`` to allow editing and forking of uploaded datasets.
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server.
 * FIX #885: Logger no longer registered by default. Added utility functions to easily register
diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py
index 0692b9b09..a4e1d9655 100644
--- a/examples/30_extended/create_upload_tutorial.py
+++ b/examples/30_extended/create_upload_tutorial.py
@@ -100,7 +100,7 @@
     # The attribute that represents the row-id column, if present in the
     # dataset.
     row_id_attribute=None,
-    # Attribute or list of attributes that should be excluded in modelling, such as 
+    # Attribute or list of attributes that should be excluded in modelling, such as
     # identifiers and indexes. E.g. "feat1" or ["feat1","feat2"]
     ignore_attribute=None,
     # How to cite the paper.
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index b15260fb4..0848a4ece 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -11,7 +11,7 @@
 
 import openml
 import pandas as pd
-from openml.datasets.functions import edit_dataset, get_dataset
+from openml.datasets import edit_dataset, fork_dataset, get_dataset
 
 ############################################################################
 # Exercise 0
@@ -139,11 +139,23 @@
 
 
 ############################################################################
-# Edit critical fields, allowed only for owners of the dataset:
-# default_target_attribute, row_id_attribute, ignore_attribute
-# To edit critical fields of a dataset owned by you, configure the API key:
+# Editing critical fields (default_target_attribute, row_id_attribute, ignore_attribute) is allowed
+# only for the dataset owner. Further, critical fields cannot be edited if the dataset has any
+# tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you,
+# configure the API key:
 # openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
 data_id = edit_dataset(564, default_target_attribute="y")
 print(f"Edited dataset ID: {data_id}")
 
+
+############################################################################
+# Fork dataset
+# Used to create a copy of the dataset with you as the owner.
+# Use this API only if you are unable to edit the critical fields (default_target_attribute,
+# ignore_attribute, row_id_attribute) of a dataset through the edit_dataset API.
+# After the dataset is forked, you can edit the new version of the dataset using edit_dataset.
+
+data_id = fork_dataset(564)
+print(f"Forked dataset ID: {data_id}")
+
 openml.config.stop_using_configuration_for_example()
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
index f380a1676..abde85c06 100644
--- a/openml/datasets/__init__.py
+++ b/openml/datasets/__init__.py
@@ -9,6 +9,8 @@
     list_datasets,
     status_update,
     list_qualities,
+    edit_dataset,
+    fork_dataset,
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
@@ -24,4 +26,6 @@
     "OpenMLDataFeature",
     "status_update",
     "list_qualities",
+    "edit_dataset",
+    "fork_dataset",
 ]
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 550747eac..84943b244 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -813,56 +813,63 @@ def edit_dataset(
     original_data_url=None,
     paper_url=None,
 ) -> int:
+    """ Edits an OpenMLDataset.
+
+    In addition to providing the dataset id of the dataset to edit (through data_id),
+    you must specify a value for at least one of the optional function arguments,
+    i.e. one value for a field to edit.
+
+    This function allows editing of both non-critical and critical fields.
+    Critical fields are default_target_attribute, ignore_attribute, row_id_attribute.
+
+     - Editing non-critical data fields is allowed for all authenticated users.
+     - Editing critical fields is allowed only for the owner, provided there are no tasks
+       associated with this dataset.
+
+    If dataset has tasks or if the user is not the owner, the only way
+    to edit critical fields is to use fork_dataset followed by edit_dataset.
+
+    Parameters
+    ----------
+    data_id : int
+        ID of the dataset.
+    description : str
+        Description of the dataset.
+    creator : str
+        The person who created the dataset.
+    contributor : str
+        People who contributed to the current version of the dataset.
+    collection_date : str
+        The date the data was originally collected, given by the uploader.
+    language : str
+        Language in which the data is represented.
+        Starts with 1 upper case letter, rest lower case, e.g. 'English'.
+    default_target_attribute : str
+        The default target attribute, if it exists.
+        Can have multiple values, comma separated.
+    ignore_attribute : str | list
+        Attributes that should be excluded in modelling,
+        such as identifiers and indexes.
+    citation : str
+        Reference(s) that should be cited when building on this data.
+    row_id_attribute : str, optional
+        The attribute that represents the row-id column, if present in the
+        dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
+        specified, the index of the dataframe will be used as the
+        ``row_id_attribute``. If the name of the index is ``None``, it will
+        be discarded.
+
+        .. versionadded: 0.8
+            Inference of ``row_id_attribute`` from a dataframe.
+    original_data_url : str, optional
+        For derived data, the url to the original dataset.
+    paper_url : str, optional
+        Link to a paper describing the dataset.
+
+    Returns
+    -------
+    Dataset id
     """
-      Edits an OpenMLDataset.
-      Specify at least one field to edit, apart from data_id
-       - For certain fields, a new dataset version is created : attributes, data,
-       default_target_attribute, ignore_attribute, row_id_attribute.
-
-       - For other fields, the uploader can edit the existing version.
-        No one except the uploader can edit the existing version.
-
-      Parameters
-      ----------
-      data_id : int
-          ID of the dataset.
-      description : str
-          Description of the dataset.
-      creator : str
-          The person who created the dataset.
-      contributor : str
-          People who contributed to the current version of the dataset.
-      collection_date : str
-          The date the data was originally collected, given by the uploader.
-      language : str
-          Language in which the data is represented.
-          Starts with 1 upper case letter, rest lower case, e.g. 'English'.
-      default_target_attribute : str
-          The default target attribute, if it exists.
-          Can have multiple values, comma separated.
-      ignore_attribute : str | list
-          Attributes that should be excluded in modelling,
-          such as identifiers and indexes.
-      citation : str
-          Reference(s) that should be cited when building on this data.
-      row_id_attribute : str, optional
-          The attribute that represents the row-id column, if present in the
-          dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
-          specified, the index of the dataframe will be used as the
-          ``row_id_attribute``. If the name of the index is ``None``, it will
-          be discarded.
-
-          .. versionadded: 0.8
-              Inference of ``row_id_attribute`` from a dataframe.
-      original_data_url : str, optional
-          For derived data, the url to the original dataset.
-      paper_url : str, optional
-          Link to a paper describing the dataset.
-
-
-      Returns
-      -------
-      data_id of the existing edited version or the new version created and published"""
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
 
@@ -897,6 +904,45 @@ def edit_dataset(
     return int(data_id)
 
 
+def fork_dataset(data_id: int) -> int:
+    """
+     Creates a new dataset version, with the authenticated user as the new owner.
+     The forked dataset can have distinct dataset meta-data,
+     but the actual data itself is shared with the original version.
+
+     This API is intended for use when a user is unable to edit the critical fields of a dataset
+     through the edit_dataset API.
+     (Critical fields are default_target_attribute, ignore_attribute, row_id_attribute.)
+
+     Specifically, this happens when the user is:
+            1. Not the owner of the dataset.
+            2. User is the owner of the dataset, but the dataset has tasks.
+
+     In these two cases the only way to edit critical fields is:
+            1. STEP 1: Fork the dataset using fork_dataset API
+            2. STEP 2: Call edit_dataset API on the forked version.
+
+
+    Parameters
+    ----------
+    data_id : int
+        id of the dataset to be forked
+
+    Returns
+    -------
+    Dataset id of the forked dataset
+
+    """
+    if not isinstance(data_id, int):
+        raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
+    # compose data fork parameters
+    form_data = {"data_id": data_id}
+    result_xml = openml._api_calls._perform_api_call("data/fork", "post", data=form_data)
+    result = xmltodict.parse(result_xml)
+    data_id = result["oml:data_fork"]["oml:id"]
+    return int(data_id)
+
+
 def _get_dataset_description(did_cache_dir, dataset_id):
     """Get the dataset description as xml dictionary.
 
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 5076d06c2..c6e6f78f8 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -26,7 +26,6 @@
 from openml.utils import _tag_entity, _create_cache_directory_for_id
 from openml.datasets.functions import (
     create_dataset,
-    edit_dataset,
     attributes_arff_from_df,
     _get_cached_dataset,
     _get_cached_dataset_features,
@@ -40,6 +39,7 @@
     _get_online_dataset_format,
     DATASETS_CACHE_DIR_NAME,
 )
+from openml.datasets import fork_dataset, edit_dataset
 
 
 class TestOpenMLDataset(TestBase):
@@ -1386,10 +1386,10 @@ def test_data_edit_errors(self):
             OpenMLServerException,
             "Unknown dataset",
             edit_dataset,
-            data_id=100000,
+            data_id=999999,
             description="xor operation dataset",
         )
-        # Check server exception when owner/admin edits critical features of dataset with tasks
+        # Check server exception when owner/admin edits critical fields of dataset with tasks
         self.assertRaisesRegex(
             OpenMLServerException,
             "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
@@ -1398,7 +1398,7 @@ def test_data_edit_errors(self):
             data_id=223,
             default_target_attribute="y",
         )
-        # Check server exception when a non-owner or non-admin tries to edit critical features
+        # Check server exception when a non-owner or non-admin tries to edit critical fields
         self.assertRaisesRegex(
             OpenMLServerException,
             "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
@@ -1407,3 +1407,12 @@ def test_data_edit_errors(self):
             data_id=128,
             default_target_attribute="y",
         )
+
+    def test_data_fork(self):
+        did = 1
+        result = fork_dataset(did)
+        self.assertNotEqual(did, result)
+        # Check server exception when unknown dataset is provided
+        self.assertRaisesRegex(
+            OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999,
+        )

From f464a2b753f0c50a483d12c189e9c1e40fe85031 Mon Sep 17 00:00:00 2001
From: Aryan Chouhan <46817791+chouhanaryan@users.noreply.github.com>
Date: Sat, 24 Oct 2020 12:38:55 +0530
Subject: [PATCH 46/48] Change default size for list_evaluations (#965)

* Change default size for list_evaluations to 10000

* Suggestions from code review
---
 doc/progress.rst                              |  1 +
 examples/40_paper/2018_ida_strang_example.py  |  2 +-
 openml/evaluations/functions.py               |  7 ++++---
 .../test_evaluation_functions.py              | 20 +++++++++++++------
 tests/test_study/test_study_functions.py      |  4 +++-
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 2aad9e62a..abab9f057 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -19,6 +19,7 @@ Changelog
 * MAINT #897: Dropping support for Python 3.5.
 * ADD #894: Support caching of datasets using feather format as an option.
 * ADD #945: PEP 561 compliance for distributing Type information
+* MAINT #371: ``list_evaluations`` default ``size`` changed from ``None`` to ``10_000``.
 
 0.10.2
 ~~~~~~
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index 687d973c2..8b225125b 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -47,7 +47,7 @@
 
 # Downloads all evaluation records related to this study
 evaluations = openml.evaluations.list_evaluations(
-    measure, flows=flow_ids, study=study_id, output_format="dataframe"
+    measure, size=None, flows=flow_ids, study=study_id, output_format="dataframe"
 )
 # gives us a table with columns data_id, flow1_value, flow2_value
 evaluations = evaluations.pivot(index="data_id", columns="flow_id", values="value").dropna()
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index 4c17f8ce7..b3fdd0aa0 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -16,7 +16,7 @@
 def list_evaluations(
     function: str,
     offset: Optional[int] = None,
-    size: Optional[int] = None,
+    size: Optional[int] = 10000,
     tasks: Optional[List[Union[str, int]]] = None,
     setups: Optional[List[Union[str, int]]] = None,
     flows: Optional[List[Union[str, int]]] = None,
@@ -38,8 +38,9 @@ def list_evaluations(
         the evaluation function. e.g., predictive_accuracy
     offset : int, optional
         the number of runs to skip, starting from the first
-    size : int, optional
-        the maximum number of runs to show
+    size : int, default 10000
+        The maximum number of runs to show.
+        If set to ``None``, it returns all the results.
 
     tasks : list[int,str], optional
         the list of task IDs
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index 0127309a7..e4de9b03c 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -41,7 +41,9 @@ def test_evaluation_list_filter_task(self):
 
         task_id = 7312
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", tasks=[task_id])
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=110, tasks=[task_id]
+        )
 
         self.assertGreater(len(evaluations), 100)
         for run_id in evaluations.keys():
@@ -56,7 +58,7 @@ def test_evaluation_list_filter_uploader_ID_16(self):
 
         uploader_id = 16
         evaluations = openml.evaluations.list_evaluations(
-            "predictive_accuracy", uploaders=[uploader_id], output_format="dataframe"
+            "predictive_accuracy", size=60, uploaders=[uploader_id], output_format="dataframe"
         )
         self.assertEqual(evaluations["uploader"].unique(), [uploader_id])
 
@@ -66,7 +68,9 @@ def test_evaluation_list_filter_uploader_ID_10(self):
         openml.config.server = self.production_server
 
         setup_id = 10
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setups=[setup_id])
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=60, setups=[setup_id]
+        )
 
         self.assertGreater(len(evaluations), 50)
         for run_id in evaluations.keys():
@@ -81,7 +85,9 @@ def test_evaluation_list_filter_flow(self):
 
         flow_id = 100
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flows=[flow_id])
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=10, flows=[flow_id]
+        )
 
         self.assertGreater(len(evaluations), 2)
         for run_id in evaluations.keys():
@@ -96,7 +102,9 @@ def test_evaluation_list_filter_run(self):
 
         run_id = 12
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", runs=[run_id])
+        evaluations = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=2, runs=[run_id]
+        )
 
         self.assertEqual(len(evaluations), 1)
         for run_id in evaluations.keys():
@@ -164,7 +172,7 @@ def test_evaluation_list_sort(self):
         task_id = 6
         # Get all evaluations of the task
         unsorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", offset=0, tasks=[task_id]
+            "predictive_accuracy", size=None, offset=0, tasks=[task_id]
         )
         # Get top 10 evaluations of the same task
         sorted_eval = openml.evaluations.list_evaluations(
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index b3adfc9d6..993771c90 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -152,7 +152,9 @@ def test_publish_study(self):
         self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
 
         # test whether the list evaluation function also handles study data fine
-        run_ids = openml.evaluations.list_evaluations("predictive_accuracy", study=study.id)
+        run_ids = openml.evaluations.list_evaluations(
+            "predictive_accuracy", size=None, study=study.id
+        )
         self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
 
         # attach more runs

From 7a3e69faea8e44df873e699a1738736e05efc1ed Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Sat, 24 Oct 2020 14:49:19 +0200
Subject: [PATCH 47/48] prepare release of 0.11.0 (#966)

Co-authored-by: PGijsbers <p.gijsbers@tue.nl>
---
 doc/progress.rst      | 30 +++++++++++++++++++++++++++---
 openml/__version__.py |  2 +-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index abab9f057..1956fcb42 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,18 +8,42 @@ Changelog
 
 0.11.0
 ~~~~~~
+* ADD #753: Allows uploading custom flows to OpenML via OpenML-Python.
+* ADD #777: Allows running a flow on pandas dataframes (in addition to numpy arrays).
+* ADD #888: Allow passing a `task_id` to `run_model_on_task`.
+* ADD #894: Support caching of datasets using feather format as an option.
 * ADD #929: Add ``edit_dataset`` and ``fork_dataset`` to allow editing and forking of uploaded datasets.
+* ADD #866, #943: Add support for scikit-learn's `passthrough` and `drop` when uploading flows to
+  OpenML.
+* ADD #879: Add support for scikit-learn's MLP hyperparameter `layer_sizes`.
+* ADD #894: Support caching of datasets using feather format as an option.
+* ADD #945: PEP 561 compliance for distributing Type information.
+* DOC #660: Remove nonexistent argument from docstring.
+* DOC #901: The API reference now documents the config file and its options.
+* DOC #912: API reference now shows `create_task`.
+* DOC #954: Remove TODO text from documentation.
+* DOC #960: document how to upload multiple ignore attributes.
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server.
 * FIX #885: Logger no longer registered by default. Added utility functions to easily register
   logging to console and file.
+* FIX #890: Correct the scaling of data in the SVM example.
+* MAINT #371: ``list_evaluations`` default ``size`` changed from ``None`` to ``10_000``.
 * MAINT #767: Source distribution installation is now unit-tested.
+* MAINT #781: Add pre-commit and automated code formatting with black.
+* MAINT #804: Rename arguments of list_evaluations to indicate they expect lists of ids.
 * MAINT #836: OpenML supports only pandas version 1.0.0 or above.
 * MAINT #865: OpenML no longer bundles test files in the source distribution.
+* MAINT #881: Improve the error message for too-long URIs.
 * MAINT #897: Dropping support for Python 3.5.
-* ADD #894: Support caching of datasets using feather format as an option.
-* ADD #945: PEP 561 compliance for distributing Type information
-* MAINT #371: ``list_evaluations`` default ``size`` changed from ``None`` to ``10_000``.
+* MAINT #916: Adding support for Python 3.8.
+* MAINT #920: Improve error messages for dataset upload.
+* MAINT #921: Improve hangling of the OpenML server URL in the config file.
+* MAINT #925: Improve error handling and error message when loading datasets.
+* MAINT #928: Restructures the contributing documentation.
+* MAINT #936: Adding support for scikit-learn 0.23.X.
+* MAINT #945: Make OpenML-Python PEP562 compliant.
+* MAINT #951: Converts TaskType class to a TaskType enum.
 
 0.10.2
 ~~~~~~
diff --git a/openml/__version__.py b/openml/__version__.py
index 338948217..07c9a950d 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.11.0dev"
+__version__ = "0.11.0"

From ec34b5c22971a54f174dff021930f985f7988a78 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Sun, 25 Oct 2020 16:40:12 +0100
Subject: [PATCH 48/48] Update conftest.py

---
 tests/conftest.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 60d555538..461a513fd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,8 +22,6 @@
 
 # License: BSD 3-Clause
 
-# License: BSD 3-Clause
-
 import os
 import logging
 from typing import List