diff --git a/examples/getting_started/README.txt b/examples/getting_started/README.txt index 2c2d8c2d2..308f8a3ff 100644 --- a/examples/getting_started/README.txt +++ b/examples/getting_started/README.txt @@ -1,4 +1,5 @@ Getting started --------------- -Overall and gentle introduction to skore. +We recommend starting with these examples that provide an overall and gentle +introduction to skore. diff --git a/examples/getting_started/plot_quick_start.py b/examples/getting_started/plot_quick_start.py index 96735aff1..c0c749a27 100644 --- a/examples/getting_started/plot_quick_start.py +++ b/examples/getting_started/plot_quick_start.py @@ -7,27 +7,9 @@ """ # %% -# From your Python code, create and load a skore :class:`~skore.Project`: - -# %% -import skore - -# sphinx_gallery_start_ignore -import os -import tempfile -from pathlib import Path - -temp_dir = tempfile.TemporaryDirectory() -temp_dir_path = Path(temp_dir.name) -os.chdir(temp_dir_path) -# sphinx_gallery_end_ignore -my_project = skore.Project("my_project") - -# %% -# This will create a skore project directory named ``my_project.skore`` in your -# current working directory. - -# %% +# Machine learning evaluation and diagnostics +# =========================================== +# # Evaluate your model using skore's :class:`~skore.CrossValidationReport`: # %% @@ -42,8 +24,8 @@ cv_report = CrossValidationReport(clf, X, y) # %% -# Display the help tree to see all the insights that are available to you (given that -# you are doing binary classification): +# Display the help tree to see all the insights that are available to you (skore +# detected that you are doing binary classification): # %% cv_report.help() @@ -66,7 +48,32 @@ plt.tight_layout() # %% -# Store the results in the skore project for safe-keeping: +# Skore project: storing some items +# ================================= + +# %% +# From your Python code, create and load a skore :class:`~skore.Project`: + +# %% +import skore + +# sphinx_gallery_start_ignore +import os +import tempfile +from pathlib import Path + +temp_dir = tempfile.TemporaryDirectory() +temp_dir_path = Path(temp_dir.name) +os.chdir(temp_dir_path) +# sphinx_gallery_end_ignore +my_project = skore.Project("my_project") + +# %% +# This will create a skore project directory named ``my_project.skore`` in your +# current working directory. + +# %% +# Store some previous results in the skore project for safe-keeping: # %% my_project.put("df_cv_report_metrics", df_cv_report_metrics) @@ -77,10 +84,10 @@ # %% df_get = my_project.get("df_cv_report_metrics") -df_get # sphinx_gallery_start_ignore temp_dir.cleanup() # sphinx_gallery_end_ignore +df_get # %% # .. admonition:: What's next? diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py index f7fcd2e1b..a0819d0ef 100644 --- a/examples/getting_started/plot_skore_getting_started.py +++ b/examples/getting_started/plot_skore_getting_started.py @@ -9,164 +9,26 @@ # %% # This getting started guide illustrates how to use skore and why: # -# #. Track your ML/DS results using skore's :class:`~skore.Project` -# (for storage). -# -# #. Machine learning diagnostics: get assistance when developing your ML/DS -# projects to avoid common pitfalls and follow recommended practices. +# #. Get assistance when developing your ML/DS projects to avoid common pitfalls +# and follow recommended practices. # -# * :class:`skore.EstimatorReport`: get an insightful report for your estimator +# * :class:`skore.EstimatorReport`: get an insightful report on your estimator # -# * :class:`skore.CrossValidationReport`: get an insightful report for your +# * :class:`skore.CrossValidationReport`: get an insightful report on your # cross-validation results # # * :func:`skore.train_test_split`: get diagnostics when splitting your data - -# %% -# Tracking: skore project -# ======================= -# -# A key feature of skore is its :class:`~skore.Project` that allows to store -# items of many types. - -# %% -# Setup: creating and loading a skore project -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -# %% -# Let's start by creating a skore project directory named ``my_project.skore`` in our -# current directory. - -# %% -import skore - -# sphinx_gallery_start_ignore -import os -import tempfile -from pathlib import Path - -temp_dir = tempfile.TemporaryDirectory() -temp_dir_path = Path(temp_dir.name) -os.chdir(temp_dir_path) -# sphinx_gallery_end_ignore -my_project = skore.Project("my_project") - -# %% -# Now that the project exists, we can write some Python code (in the same -# directory) to add (:func:`~skore.Project.put`) some useful items in it, -# with a key-value convention: - -# %% -my_project.put("my_int", 3) - -# %% -# We can retrieve the value of an item: - -# %% -my_project.get("my_int") - -# %% -# Skore project: storing some items -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# -# As an illustration of the usage of the skore project with a machine learning -# motivation, let us perform a hyperparameter sweep and store relevant information -# in the skore project. - -# %% -# We search for the ``alpha`` hyperparameter of a Ridge regression on the -# Diabetes dataset: - -# %% -import numpy as np -from sklearn.datasets import load_diabetes -from sklearn.linear_model import Ridge -from sklearn.model_selection import GridSearchCV - -X, y = load_diabetes(return_X_y=True) - -gs_cv = GridSearchCV( - Ridge(), - param_grid={"alpha": np.logspace(-3, 5, 50)}, - scoring="neg_root_mean_squared_error", -) -gs_cv.fit(X, y) - -# %% -# Now, we store the hyperparameter's metrics in a dataframe and make a custom -# plot: - -# %% -import pandas as pd - -df = pd.DataFrame(gs_cv.cv_results_) -df.insert(len(df.columns), "rmse", -df["mean_test_score"].values) -df[["param_alpha", "rmse"]].head() - -# %% -import matplotlib.pyplot as plt - -fig = plt.figure(layout="constrained") -plt.plot(df["param_alpha"], df["rmse"]) -plt.xscale("log") -plt.xlabel("Alpha hyperparameter") -plt.ylabel("RMSE") -plt.title("Ridge regression") -plt.show() - -# %% -# -# Finally, we store these relevant items in our skore project, so that we -# can visualize them later: - -# %% -my_project.put("my_gs_cv", gs_cv) -my_project.put("my_df", df) -my_project.put("my_fig", fig) - -# %% -# .. seealso:: -# -# For more information about the functionalities and the different types -# of items that we can store in a skore :class:`~skore.Project`, -# see :ref:`example_working_with_projects`. - -# %% -# Tracking the history of items -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -# %% -# Suppose we store several values for a same item called ``my_key_metric``: -# -# .. code-block:: python -# -# my_project.put("my_key_metric", 4) -# -# my_project.put("my_key_metric", 9) # -# my_project.put("my_key_metric", 16) -# -# Skore does not overwrite items with the same name (key): instead, it stores -# their history so that nothing is lost. -# -# These tracking functionalities are very useful to: -# -# * never lose some key machine learning metrics, -# * and observe the evolution over time / runs. - -# %% -# .. seealso:: -# -# For more information about the tracking of items using their history, -# see :ref:`example_tracking_items`. +# #. Track your ML/DS results using skore's :class:`~skore.Project` +# (for storage). # %% -# Machine learning diagnostics and evaluation +# Machine learning evaluation and diagnostics # =========================================== # # Skore re-implements or wraps some key scikit-learn class / functions to automatically -# provide diagnostics and checks when using them, as a way to facilitate good practices -# and avoid common pitfalls. +# provide insights and diagnostics when using them, as a way to facilitate good +# practices and avoid common pitfalls. # %% # Model evaluation with skore @@ -188,7 +50,7 @@ X, y = make_classification(n_classes=2, n_samples=100_000, n_informative=4) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) -clf = LogisticRegression() +clf = LogisticRegression(random_state=0) est_report = EstimatorReport( clf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test @@ -196,7 +58,7 @@ # %% # Now, we can display the help tree to see all the insights that are available to us -# given that we are doing binary classification: +# (skore detected that we are doing binary classification): # %% est_report.help() @@ -254,10 +116,19 @@ # We display the ROC curves for each fold: # %% -roc_plot = cv_report.metrics.plot.roc() -roc_plot +roc_plot_cv = cv_report.metrics.plot.roc() +roc_plot_cv plt.tight_layout() +# %% +# We can retrieve the estimator report of a specific fold to investigate further, +# for example the first fold: + +# %% +est_report_fold = cv_report.estimator_reports_[0] +df_report_metrics_fold = est_report_fold.metrics.report_metrics() +df_report_metrics_fold + # %% # .. seealso:: # @@ -274,6 +145,7 @@ # Let us load a dataset containing some time series data: # %% +import pandas as pd from skrub.datasets import fetch_employee_salaries dataset = fetch_employee_salaries() @@ -286,19 +158,17 @@ # Now, let us apply :func:`skore.train_test_split` on this data: # %% +import skore + X_train, X_test, y_train, y_test = skore.train_test_split( X, y, random_state=0, shuffle=False ) -# sphinx_gallery_start_ignore -temp_dir.cleanup() -# sphinx_gallery_end_ignore # %% # We get a ``TimeBasedColumnWarning`` advising us to use # :class:`sklearn.model_selection.TimeSeriesSplit` instead! # Indeed, we should not shuffle time-ordered data! - # %% # .. seealso:: # @@ -306,6 +176,106 @@ # For more information about the motivation and usage of # :func:`skore.train_test_split`, see :ref:`example_train_test_split`. +# %% +# Tracking: skore project +# ======================= +# +# A key feature of skore is its :class:`~skore.Project` that allows to store +# items of many types. + +# %% +# Setup: creating and loading a skore project +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +# %% +# Let's start by creating a skore project directory named ``my_project.skore`` in our +# current directory: + +# %% + +# sphinx_gallery_start_ignore +import os +import tempfile +from pathlib import Path + +temp_dir = tempfile.TemporaryDirectory() +temp_dir_path = Path(temp_dir.name) +os.chdir(temp_dir_path) +# sphinx_gallery_end_ignore +my_project = skore.Project("my_project") + + +# %% +# Skore project: storing and retrieving some items +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Now that the project exists, we can store some useful items in it (in the same +# directory) using :func:`~skore.Project.put`), with a "universal" key-value convention: + +# %% +my_project.put("my_int", 3) +my_project.put("df_cv_report_metrics", df_cv_report_metrics) +my_project.put("roc_plot", roc_plot) + +# %% +# .. note :: +# With the skore :func:`~skore.Project.put`, there is no need to remember the API for +# each type of object: ``df.to_csv(...)``, ``plt.savefig(...)``, ``np.save(...)``, +# etc. + +# %% +# We can retrieve the value of an item: + +# %% +my_project.get("my_int") + +# %% +my_project.get("df_cv_report_metrics") + +# %% +# .. seealso:: +# +# For more information about the functionalities and the different types +# of items that we can store in a skore :class:`~skore.Project`, +# see :ref:`example_working_with_projects`. + +# %% +# Tracking the history of items +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +# %% +# Suppose we store several values for a same item called ``my_key_metric``: + +# %% +my_project.put("my_key_metric", 4) + +my_project.put("my_key_metric", 9) + +my_project.put("my_key_metric", 16) + +# %% +# Skore does not overwrite items with the same name (key): instead, it stores +# their history so that nothing is lost: + +# %% +history = my_project.get("my_key_metric", version="all") +# sphinx_gallery_start_ignore +temp_dir.cleanup() +# sphinx_gallery_end_ignore +history + +# %% +# These tracking functionalities are very useful to: +# +# * never lose some key machine learning metrics, +# * and observe the evolution over time / runs. + +# %% +# .. seealso:: +# +# For more functionalities about the tracking of items using their history, +# see :ref:`example_tracking_items`. + # %% # .. admonition:: Stay tuned! # diff --git a/examples/model_evaluation/README.txt b/examples/model_evaluation/README.txt index 327af82b9..883da6a01 100644 --- a/examples/model_evaluation/README.txt +++ b/examples/model_evaluation/README.txt @@ -2,4 +2,4 @@ Model evaluation ---------------- These examples illustrate how skore can help data scientists to improve their -machine learning modelling thanks to diagnostics. +machine learning modelling thanks to methodological guidance and diagnostics. diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py index b3d86f6b5..d4621a0ba 100644 --- a/examples/model_evaluation/plot_estimator_report.py +++ b/examples/model_evaluation/plot_estimator_report.py @@ -1,9 +1,9 @@ """ .. _example_estimator_report: -============================================ -Get insights from any scikit-learn estimator -============================================ +=============================================================== +`EstimatorReport`: Get insights from any scikit-learn estimator +=============================================================== This example shows how the :class:`skore.EstimatorReport` class can be used to quickly get insights from any scikit-learn estimator. diff --git a/examples/model_evaluation/plot_train_test_split.py b/examples/model_evaluation/plot_train_test_split.py index 326ecac16..cb0a812d3 100644 --- a/examples/model_evaluation/plot_train_test_split.py +++ b/examples/model_evaluation/plot_train_test_split.py @@ -1,9 +1,9 @@ """ .. _example_train_test_split: -================ -Train-test split -================ +============================================================ +`train_test_split`: get diagnostics when splitting your data +============================================================ This example illustrates the motivation and the use of skore's :func:`skore.train_test_split` to get assistance when developing ML/DS projects. diff --git a/examples/skore_project/plot_working_with_projects.py b/examples/skore_project/plot_working_with_projects.py index 79ba19caf..6113e6b12 100644 --- a/examples/skore_project/plot_working_with_projects.py +++ b/examples/skore_project/plot_working_with_projects.py @@ -28,6 +28,20 @@ # sphinx_gallery_end_ignore my_project = skore.Project("my_project") +# %% +# There is a very simple and unique API for all objects: +# +# .. code-block:: python +# +# my_project.put("my_key", "my_value") +# +# There is no need to remember ``plt.savefig(...)``, ``df.to_csv(...)``, +# ``np.save(...)``, etc for each type of object. +# +# In the following, we will list all the different types of objects that we can +# :func:`~skore.Project.put` inside a skore :class:`~skore.Project`. + + # %% # Storing integers # ================ @@ -47,8 +61,11 @@ my_project.get("my_int") # %% -# Careful; like in a traditional Python dictionary, the ``put`` method will *overwrite* -# past data if we use a key that already exists! +# More generally, we follow the principile of "what you put is what you get". + +# %% +# Like in a traditional Python dictionary, the ``put`` method will *overwrite* +# past data if we use a key that already exists: # %% my_project.put("my_int", 30_000) @@ -62,7 +79,7 @@ # %% # .. seealso:: # -# Skore does not exactly *overwrite*, but stores the history of items. +# Actually, skore does not exactly *overwrite*, but stores the history of items. # For more information about the tracking of items using their history, # see :ref:`example_tracking_items`. @@ -307,20 +324,6 @@ def my_func(x): my_anim_plotly_fig -# %% -# PIL image: - -# %% -import io - -import PIL - -my_pil_image = PIL.Image.new("RGB", (100, 100), color="red") -with io.BytesIO() as output: - my_pil_image.save(output, format="png") - -my_project.put("my_pil_image", my_pil_image) - # %% # Storing scikit-learn models and pipelines # ========================================= @@ -359,7 +362,17 @@ def my_func(x): my_pipeline.fit(X, y) my_project.put("my_fitted_pipeline", my_pipeline) -my_pipeline # sphinx_gallery_start_ignore temp_dir.cleanup() # sphinx_gallery_end_ignore +my_pipeline + +# %% +# Storing skore objects +# ===================== +# +# .. seealso:: +# +# We can also store skore objects such as :class:`skore.EstimatorReport` and +# :class:`skore.CrossValidationReport`. +# See :ref:`example_use_case_employee_salaries`. diff --git a/examples/technical_details/README.txt b/examples/technical_details/README.txt index 4a2613898..d6831a7f3 100644 --- a/examples/technical_details/README.txt +++ b/examples/technical_details/README.txt @@ -1,5 +1,5 @@ Technical details ----------------- -These examples shows some technical details at the core of skore to better understand +These examples show some technical details at the core of skore to better understand some of the mechanics under the hood. diff --git a/examples/use_cases/README.txt b/examples/use_cases/README.txt index 5af2a9c49..d953333ca 100644 --- a/examples/use_cases/README.txt +++ b/examples/use_cases/README.txt @@ -1,8 +1,8 @@ End-to-end data science use cases --------------------------------- -These examples show skore in action on real use cases. We aimed at showing skore's -ability to: +These examples showcase skore in action on real use cases. +We aimed at showing skore's ability to: - be compatible with scikit-learn - reduce boilerplate code for some standard *de facto* data science analysis diff --git a/sphinx/index.rst b/sphinx/index.rst index 1426cfa6c..eb27ba4c0 100644 --- a/sphinx/index.rst +++ b/sphinx/index.rst @@ -38,9 +38,9 @@ Key features - **Evaluate**: Uncover actionable insights through **automated reports** surfacing relevant metrics. Explore faster with our intelligent caching system. - - :class:`skore.EstimatorReport`: get a report for your estimator + - :class:`skore.EstimatorReport`: get a report on your estimator - - :class:`skore.CrossValidationReport`: get a report for your cross-validation + - :class:`skore.CrossValidationReport`: get a report on your cross-validation results .. currentmodule:: skore