diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 754db239..bc7b223a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.6.0
+ rev: v5.0.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
@@ -29,7 +29,7 @@ repos:
.*\.ipynb
)$
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: "v0.5.5"
+ rev: "v0.8.3"
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
diff --git a/examples/DVCLive-Evidently.ipynb b/examples/DVCLive-Evidently.ipynb
index 171bbd58..84329e99 100644
--- a/examples/DVCLive-Evidently.ipynb
+++ b/examples/DVCLive-Evidently.ipynb
@@ -1,1715 +1,1716 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "toc_visible": true,
- "authorship_tag": "ABX9TyNJAdha/v4n9zLqIfGakg0E"
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WpfOFaqHcnAt"
+ },
+ "source": [
+ "# Install Evidently and DVC with DVCLive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 2337,
+ "status": "ok",
+ "timestamp": 1697468096427,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- "language_info": {
- "name": "python"
- }
+ "id": "BqWpagFPZ45W"
+ },
+ "outputs": [],
+ "source": [
+ "!pip uninstall -q -y sqlalchemy pyarrow ipython-sql pandas-gbq"
+ ]
},
- "cells": [
- {
- "cell_type": "markdown",
- "source": [
- "# Install Evidently and DVC with DVCLive"
- ],
- "metadata": {
- "id": "WpfOFaqHcnAt"
- }
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 33615,
+ "status": "ok",
+ "timestamp": 1697468130037,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "!pip uninstall -q -y sqlalchemy pyarrow ipython-sql pandas-gbq"
- ],
- "metadata": {
- "id": "BqWpagFPZ45W",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468096427,
- "user_tz": -120,
- "elapsed": 2337,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 1,
- "outputs": []
+ "id": "DijzqeokW595"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!pip install -q dvc==3.25.0 dvclive==3.0.1 evidently==0.4.5 pandas==1.5.3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ZyZ2sX8GcvMU"
+ },
+ "source": [
+ "# Load the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "DijzqeokW595",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468130037,
- "user_tz": -120,
- "elapsed": 33615,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "outputs": [],
- "source": [
- "%%capture\n",
- "!pip install -q dvc==3.25.0 dvclive==3.0.1 evidently==0.4.5 pandas==1.5.3"
- ]
+ "executionInfo": {
+ "elapsed": 1772,
+ "status": "ok",
+ "timestamp": 1697468131788,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "ZUrB0D59XMDD",
+ "outputId": "9f6f5a3c-f856-4d56-a8fb-ec4483ec6127"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "source": [
- "# Load the data"
- ],
- "metadata": {
- "id": "ZyZ2sX8GcvMU"
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2023-10-16 14:55:29-- https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\n",
+ "Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252\n",
+ "Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: unspecified\n",
+ "Saving to: ‘bike+sharing+dataset.zip’\n",
+ "\n",
+ "bike+sharing+datase [ <=> ] 273.43K 443KB/s in 0.6s \n",
+ "\n",
+ "2023-10-16 14:55:30 (443 KB/s) - ‘bike+sharing+dataset.zip’ saved [279992]\n",
+ "\n",
+ "Archive: bike+sharing+dataset.zip\n",
+ " inflating: Readme.txt \n",
+ " inflating: day.csv \n",
+ " inflating: hour.csv \n"
+ ]
+ }
+ ],
+ "source": [
+ "!mkdir raw_data && \\\n",
+ " cd raw_data && \\\n",
+ " wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip && \\\n",
+ " unzip bike+sharing+dataset.zip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 357,
+ "status": "ok",
+ "timestamp": 1697468132141,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "!mkdir raw_data && \\\n",
- " cd raw_data && \\\n",
- " wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip && \\\n",
- " unzip bike+sharing+dataset.zip"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "ZUrB0D59XMDD",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468131788,
- "user_tz": -120,
- "elapsed": 1772,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "9f6f5a3c-f856-4d56-a8fb-ec4483ec6127"
- },
- "execution_count": 3,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "--2023-10-16 14:55:29-- https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\n",
- "Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252\n",
- "Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: unspecified\n",
- "Saving to: ‘bike+sharing+dataset.zip’\n",
- "\n",
- "bike+sharing+datase [ <=> ] 273.43K 443KB/s in 0.6s \n",
- "\n",
- "2023-10-16 14:55:30 (443 KB/s) - ‘bike+sharing+dataset.zip’ saved [279992]\n",
- "\n",
- "Archive: bike+sharing+dataset.zip\n",
- " inflating: Readme.txt \n",
- " inflating: day.csv \n",
- " inflating: hour.csv \n"
- ]
- }
- ]
+ "id": "P3XXcUrQY1EQ"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
},
- {
- "cell_type": "code",
- "source": [
- "import pandas as pd"
- ],
- "metadata": {
- "id": "P3XXcUrQY1EQ",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468132141,
- "user_tz": -120,
- "elapsed": 357,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 4,
- "outputs": []
+ "executionInfo": {
+ "elapsed": 9,
+ "status": "ok",
+ "timestamp": 1697468132141,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "MDK0xkdbYCWg",
+ "outputId": "ec8d2605-144d-45ff-b442-70ba858a44a3"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [
- "df = pd.read_csv(\"raw_data/day.csv\", header=0, sep=',', parse_dates=['dteday'])\n",
- "df.head()"
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " instant | \n",
+ " dteday | \n",
+ " season | \n",
+ " yr | \n",
+ " mnth | \n",
+ " holiday | \n",
+ " weekday | \n",
+ " workingday | \n",
+ " weathersit | \n",
+ " temp | \n",
+ " atemp | \n",
+ " hum | \n",
+ " windspeed | \n",
+ " casual | \n",
+ " registered | \n",
+ " cnt | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2011-01-01 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 0.344167 | \n",
+ " 0.363625 | \n",
+ " 0.805833 | \n",
+ " 0.160446 | \n",
+ " 331 | \n",
+ " 654 | \n",
+ " 985 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2011-01-02 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 0.363478 | \n",
+ " 0.353739 | \n",
+ " 0.696087 | \n",
+ " 0.248539 | \n",
+ " 131 | \n",
+ " 670 | \n",
+ " 801 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 2011-01-03 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.196364 | \n",
+ " 0.189405 | \n",
+ " 0.437273 | \n",
+ " 0.248309 | \n",
+ " 120 | \n",
+ " 1229 | \n",
+ " 1349 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2011-01-04 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.200000 | \n",
+ " 0.212122 | \n",
+ " 0.590435 | \n",
+ " 0.160296 | \n",
+ " 108 | \n",
+ " 1454 | \n",
+ " 1562 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 2011-01-05 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.226957 | \n",
+ " 0.229270 | \n",
+ " 0.436957 | \n",
+ " 0.186900 | \n",
+ " 82 | \n",
+ " 1518 | \n",
+ " 1600 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 206
- },
- "id": "MDK0xkdbYCWg",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468132141,
- "user_tz": -120,
- "elapsed": 9,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "ec8d2605-144d-45ff-b442-70ba858a44a3"
- },
- "execution_count": 5,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- " instant dteday season yr mnth holiday weekday workingday \\\n",
- "0 1 2011-01-01 1 0 1 0 6 0 \n",
- "1 2 2011-01-02 1 0 1 0 0 0 \n",
- "2 3 2011-01-03 1 0 1 0 1 1 \n",
- "3 4 2011-01-04 1 0 1 0 2 1 \n",
- "4 5 2011-01-05 1 0 1 0 3 1 \n",
- "\n",
- " weathersit temp atemp hum windspeed casual registered \\\n",
- "0 2 0.344167 0.363625 0.805833 0.160446 331 654 \n",
- "1 2 0.363478 0.353739 0.696087 0.248539 131 670 \n",
- "2 1 0.196364 0.189405 0.437273 0.248309 120 1229 \n",
- "3 1 0.200000 0.212122 0.590435 0.160296 108 1454 \n",
- "4 1 0.226957 0.229270 0.436957 0.186900 82 1518 \n",
- "\n",
- " cnt \n",
- "0 985 \n",
- "1 801 \n",
- "2 1349 \n",
- "3 1562 \n",
- "4 1600 "
- ],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " instant | \n",
- " dteday | \n",
- " season | \n",
- " yr | \n",
- " mnth | \n",
- " holiday | \n",
- " weekday | \n",
- " workingday | \n",
- " weathersit | \n",
- " temp | \n",
- " atemp | \n",
- " hum | \n",
- " windspeed | \n",
- " casual | \n",
- " registered | \n",
- " cnt | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " 2011-01-01 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 6 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0.344167 | \n",
- " 0.363625 | \n",
- " 0.805833 | \n",
- " 0.160446 | \n",
- " 331 | \n",
- " 654 | \n",
- " 985 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 2011-01-02 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0.363478 | \n",
- " 0.353739 | \n",
- " 0.696087 | \n",
- " 0.248539 | \n",
- " 131 | \n",
- " 670 | \n",
- " 801 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " 2011-01-03 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0.196364 | \n",
- " 0.189405 | \n",
- " 0.437273 | \n",
- " 0.248309 | \n",
- " 120 | \n",
- " 1229 | \n",
- " 1349 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " 2011-01-04 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0.200000 | \n",
- " 0.212122 | \n",
- " 0.590435 | \n",
- " 0.160296 | \n",
- " 108 | \n",
- " 1454 | \n",
- " 1562 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " 2011-01-05 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0.226957 | \n",
- " 0.229270 | \n",
- " 0.436957 | \n",
- " 0.186900 | \n",
- " 82 | \n",
- " 1518 | \n",
- " 1600 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ]
- },
- "metadata": {},
- "execution_count": 5
- }
+ "text/plain": [
+ " instant dteday season yr mnth holiday weekday workingday \\\n",
+ "0 1 2011-01-01 1 0 1 0 6 0 \n",
+ "1 2 2011-01-02 1 0 1 0 0 0 \n",
+ "2 3 2011-01-03 1 0 1 0 1 1 \n",
+ "3 4 2011-01-04 1 0 1 0 2 1 \n",
+ "4 5 2011-01-05 1 0 1 0 3 1 \n",
+ "\n",
+ " weathersit temp atemp hum windspeed casual registered \\\n",
+ "0 2 0.344167 0.363625 0.805833 0.160446 331 654 \n",
+ "1 2 0.363478 0.353739 0.696087 0.248539 131 670 \n",
+ "2 1 0.196364 0.189405 0.437273 0.248309 120 1229 \n",
+ "3 1 0.200000 0.212122 0.590435 0.160296 108 1454 \n",
+ "4 1 0.226957 0.229270 0.436957 0.186900 82 1518 \n",
+ "\n",
+ " cnt \n",
+ "0 985 \n",
+ "1 801 \n",
+ "2 1349 \n",
+ "3 1562 \n",
+ "4 1600 "
]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"raw_data/day.csv\", header=0, sep=\",\", parse_dates=[\"dteday\"])\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4a9DrmjyhhEP"
+ },
+ "source": [
+ "# Define column mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 5,
+ "status": "ok",
+ "timestamp": 1697468132141,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "markdown",
- "source": [
- "# Define column mapping"
- ],
- "metadata": {
- "id": "4a9DrmjyhhEP"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "from evidently.pipeline.column_mapping import ColumnMapping"
- ],
- "metadata": {
- "id": "_bkEZuM8gELe",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468132141,
- "user_tz": -120,
- "elapsed": 5,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 6,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "data_columns = ColumnMapping()\n",
- "data_columns.numerical_features = ['weathersit', 'temp', 'atemp', 'hum', 'windspeed']\n",
- "data_columns.categorical_features = ['holiday', 'workingday']"
- ],
- "metadata": {
- "id": "dLIZqkHAgEuo",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468132141,
- "user_tz": -120,
- "elapsed": 5,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 7,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Define what to log"
- ],
- "metadata": {
- "id": "yNBKbk51hpyz"
- }
+ "id": "_bkEZuM8gELe"
+ },
+ "outputs": [],
+ "source": [
+ "from evidently.pipeline.column_mapping import ColumnMapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 5,
+ "status": "ok",
+ "timestamp": 1697468132141,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "from evidently.report import Report\n",
- "from evidently.metric_preset import DataDriftPreset"
- ],
- "metadata": {
- "id": "owblpS3Ahw0o",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468136565,
- "user_tz": -120,
- "elapsed": 4428,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 8,
- "outputs": []
+ "id": "dLIZqkHAgEuo"
+ },
+ "outputs": [],
+ "source": [
+ "data_columns = ColumnMapping()\n",
+ "data_columns.numerical_features = [\"weathersit\", \"temp\", \"atemp\", \"hum\", \"windspeed\"]\n",
+ "data_columns.categorical_features = [\"holiday\", \"workingday\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yNBKbk51hpyz"
+ },
+ "source": [
+ "# Define what to log"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 4428,
+ "status": "ok",
+ "timestamp": 1697468136565,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "def eval_drift(reference, production, column_mapping):\n",
- " data_drift_report = Report(metrics=[DataDriftPreset()])\n",
- " data_drift_report.run(\n",
- " reference_data=reference, current_data=production, column_mapping=column_mapping\n",
- " )\n",
- " report = data_drift_report.as_dict()\n",
- "\n",
- " drifts = []\n",
- "\n",
- " for feature in (\n",
- " column_mapping.numerical_features + column_mapping.categorical_features\n",
- " ):\n",
- " drifts.append(\n",
- " (\n",
- " feature,\n",
- " report[\"metrics\"][1][\"result\"][\"drift_by_columns\"][feature][\n",
- " \"drift_score\"\n",
- " ],\n",
- " )\n",
- " )\n",
- "\n",
- " return drifts\n"
- ],
- "metadata": {
- "id": "vRF8PjiYho6z",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468136565,
- "user_tz": -120,
- "elapsed": 3,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 9,
- "outputs": []
+ "id": "owblpS3Ahw0o"
+ },
+ "outputs": [],
+ "source": [
+ "from evidently.report import Report\n",
+ "from evidently.metric_preset import DataDriftPreset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 3,
+ "status": "ok",
+ "timestamp": 1697468136565,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "markdown",
- "source": [
- "# Define the comparison windows"
- ],
- "metadata": {
- "id": "4Yhet51mh6Xz"
- }
+ "id": "vRF8PjiYho6z"
+ },
+ "outputs": [],
+ "source": [
+ "def eval_drift(reference, production, column_mapping):\n",
+ " data_drift_report = Report(metrics=[DataDriftPreset()])\n",
+ " data_drift_report.run(\n",
+ " reference_data=reference, current_data=production, column_mapping=column_mapping\n",
+ " )\n",
+ " report = data_drift_report.as_dict()\n",
+ "\n",
+ " drifts = []\n",
+ "\n",
+ " for feature in (\n",
+ " column_mapping.numerical_features + column_mapping.categorical_features\n",
+ " ):\n",
+ " drifts.append(\n",
+ " (\n",
+ " feature,\n",
+ " report[\"metrics\"][1][\"result\"][\"drift_by_columns\"][feature][\n",
+ " \"drift_score\"\n",
+ " ],\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " return drifts"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4Yhet51mh6Xz"
+ },
+ "source": [
+ "# Define the comparison windows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 3,
+ "status": "ok",
+ "timestamp": 1697468136565,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "#set reference dates\n",
- "reference_dates = ('2011-01-01 00:00:00','2011-01-28 23:00:00')\n",
- "\n",
- "#set experiment batches dates\n",
- "experiment_batches = [\n",
- " ('2011-01-01 00:00:00','2011-01-29 23:00:00'),\n",
- " ('2011-01-29 00:00:00','2011-02-07 23:00:00'),\n",
- " ('2011-02-07 00:00:00','2011-02-14 23:00:00'),\n",
- " ('2011-02-15 00:00:00','2011-02-21 23:00:00'),\n",
- "]"
- ],
- "metadata": {
- "id": "nTq8xUbGh3Ux",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468136565,
- "user_tz": -120,
- "elapsed": 3,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 10,
- "outputs": []
+ "id": "nTq8xUbGh3Ux"
+ },
+ "outputs": [],
+ "source": [
+ "# set reference dates\n",
+ "reference_dates = (\"2011-01-01 00:00:00\", \"2011-01-28 23:00:00\")\n",
+ "\n",
+ "# set experiment batches dates\n",
+ "experiment_batches = [\n",
+ " (\"2011-01-01 00:00:00\", \"2011-01-29 23:00:00\"),\n",
+ " (\"2011-01-29 00:00:00\", \"2011-02-07 23:00:00\"),\n",
+ " (\"2011-02-07 00:00:00\", \"2011-02-14 23:00:00\"),\n",
+ " (\"2011-02-15 00:00:00\", \"2011-02-21 23:00:00\"),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8lNq9OdniDss"
+ },
+ "source": [
+ "# Run and log experiments with DVCLive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 3,
+ "status": "ok",
+ "timestamp": 1697468136565,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "markdown",
- "source": [
- "# Run and log experiments with DVCLive"
- ],
- "metadata": {
- "id": "8lNq9OdniDss"
- }
+ "id": "zUt5jrVSRIqD"
+ },
+ "outputs": [],
+ "source": [
+ "!git config --global user.email \"you@example.com\"\n",
+ "!git config --global user.name \"Your Name\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 1231,
+ "status": "ok",
+ "timestamp": 1697468137794,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "!git config --global user.email \"you@example.com\"\n",
- "!git config --global user.name \"Your Name\""
- ],
- "metadata": {
- "id": "zUt5jrVSRIqD",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468136565,
- "user_tz": -120,
- "elapsed": 3,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 11,
- "outputs": []
+ "id": "5Hx1jI9PnT3C"
+ },
+ "outputs": [],
+ "source": [
+ "from dvclive import Live"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jTsrtISaSF7D"
+ },
+ "source": [
+ "There are two ways to use DVC, put all the drift evaluation steps in one single experiment (corresponding to a git commit), or to save each step as a separate experiment (git commit)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "RGrEbbla30jr"
+ },
+ "source": [
+ "## In one experiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "source": [
- "from dvclive import Live"
- ],
- "metadata": {
- "id": "5Hx1jI9PnT3C",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468137794,
- "user_tz": -120,
- "elapsed": 1231,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 12,
- "outputs": []
+ "executionInfo": {
+ "elapsed": 2844,
+ "status": "ok",
+ "timestamp": 1697468140631,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "ijUf_HhRobl0",
+ "outputId": "796d7eec-17dc-40b2-a4c9-5bdcf9184c58"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "source": [
- "There are two ways to use DVC, put all the drift evaluation steps in one single experiment (corresponding to a git commit), or to save each step as a separate experiment (git commit)"
- ],
- "metadata": {
- "id": "jTsrtISaSF7D"
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/content\n",
+ "/content/experiments\n",
+ "hint: Using 'master' as the name for the initial branch. This default branch name\n",
+ "hint: is subject to change. To configure the initial branch name to use in all\n",
+ "hint: of your new repositories, which will suppress this warning, call:\n",
+ "hint: \n",
+ "hint: \tgit config --global init.defaultBranch \n",
+ "hint: \n",
+ "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n",
+ "hint: 'development'. The just-created branch can be renamed via this command:\n",
+ "hint: \n",
+ "hint: \tgit branch -m \n",
+ "Initialized empty Git repository in /content/experiments/.git/\n",
+ "fatal: pathspec '.gitignore' did not match any files\n",
+ "On branch master\n",
+ "\n",
+ "Initial commit\n",
+ "\n",
+ "nothing to commit (create/copy files and use \"git add\" to track)\n",
+ "Initialized DVC repository.\n",
+ "\n",
+ "You can now commit the changes to git.\n",
+ "\n",
+ "+---------------------------------------------------------------------+\n",
+ "| |\n",
+ "| DVC has enabled anonymous aggregate usage analytics. |\n",
+ "| Read the analytics documentation (and how to opt-out) here: |\n",
+ "| |\n",
+ "| |\n",
+ "+---------------------------------------------------------------------+\n",
+ "\n",
+ "What's next?\n",
+ "------------\n",
+ "- Check out the documentation: \n",
+ "- Get help and share ideas: \n",
+ "- Star us on GitHub: \n",
+ "[master (root-commit) 9220260] Init DVC\n",
+ " 3 files changed, 6 insertions(+)\n",
+ " create mode 100644 .dvc/.gitignore\n",
+ " create mode 100644 .dvc/config\n",
+ " create mode 100644 .dvcignore\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Setup a git repo with dvc\n",
+ "\n",
+ "%cd /content\n",
+ "!rm -rf experiments && mkdir experiments\n",
+ "%cd experiments\n",
+ "\n",
+ "!git init\n",
+ "!git add .gitignore\n",
+ "!git commit -m \"Init repo\"\n",
+ "!dvc init\n",
+ "!git commit -m \"Init DVC\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
},
- {
- "cell_type": "markdown",
- "source": [
- "## In one experiment"
- ],
- "metadata": {
- "id": "RGrEbbla30jr"
- }
+ "executionInfo": {
+ "elapsed": 16055,
+ "status": "ok",
+ "timestamp": 1697468156663,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "_h-jGJqPiA30",
+ "outputId": "0b949e24-8c53-4765-a8ee-64d002b3801e"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [
- "# Setup a git repo with dvc\n",
- "\n",
- "%cd /content\n",
- "!rm -rf experiments && mkdir experiments\n",
- "%cd experiments\n",
- "\n",
- "!git init\n",
- "!git add .gitignore\n",
- "!git commit -m \"Init repo\"\n",
- "!dvc init\n",
- "!git commit -m \"Init DVC\""
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "ijUf_HhRobl0",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468140631,
- "user_tz": -120,
- "elapsed": 2844,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "796d7eec-17dc-40b2-a4c9-5bdcf9184c58"
- },
- "execution_count": 13,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "/content\n",
- "/content/experiments\n",
- "hint: Using 'master' as the name for the initial branch. This default branch name\n",
- "hint: is subject to change. To configure the initial branch name to use in all\n",
- "hint: of your new repositories, which will suppress this warning, call:\n",
- "hint: \n",
- "hint: \tgit config --global init.defaultBranch \n",
- "hint: \n",
- "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n",
- "hint: 'development'. The just-created branch can be renamed via this command:\n",
- "hint: \n",
- "hint: \tgit branch -m \n",
- "Initialized empty Git repository in /content/experiments/.git/\n",
- "fatal: pathspec '.gitignore' did not match any files\n",
- "On branch master\n",
- "\n",
- "Initial commit\n",
- "\n",
- "nothing to commit (create/copy files and use \"git add\" to track)\n",
- "Initialized DVC repository.\n",
- "\n",
- "You can now commit the changes to git.\n",
- "\n",
- "+---------------------------------------------------------------------+\n",
- "| |\n",
- "| DVC has enabled anonymous aggregate usage analytics. |\n",
- "| Read the analytics documentation (and how to opt-out) here: |\n",
- "| |\n",
- "| |\n",
- "+---------------------------------------------------------------------+\n",
- "\n",
- "What's next?\n",
- "------------\n",
- "- Check out the documentation: \n",
- "- Get help and share ideas: \n",
- "- Star us on GitHub: \n",
- "[master (root-commit) 9220260] Init DVC\n",
- " 3 files changed, 6 insertions(+)\n",
- " create mode 100644 .dvc/.gitignore\n",
- " create mode 100644 .dvc/config\n",
- " create mode 100644 .dvcignore\n"
- ]
- }
+ "data": {
+ "text/markdown": "# DVC Report\n\nparams.yaml\n\n| begin | end |\n|---------------------|---------------------|\n| 2011-02-15 00:00:00 | 2011-02-21 23:00:00 |\n\nmetrics.json\n\n| weathersit | temp | atemp | hum | windspeed | holiday | workingday | step |\n|--------------|--------|---------|-------|-------------|-----------|--------------|--------|\n| 0.231 | 0 | 0 | 0.062 | 0.012 | 0.275 | 0.593 | 3 |\n\n![static/holiday]()\n\n![static/windspeed]()\n\n![static/temp]()\n\n![static/workingday]()\n\n![static/weathersit]()\n\n![static/hum]()\n\n![static/atemp]()\n",
+ "text/plain": [
+ ""
]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "with Live(report=\"notebook\") as live:\n",
+ " for date in experiment_batches:\n",
+ " live.log_param(\"begin\", date[0])\n",
+ " live.log_param(\"end\", date[1])\n",
+ "\n",
+ " metrics = eval_drift(\n",
+ " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n",
+ " df.loc[df.dteday.between(date[0], date[1])],\n",
+ " column_mapping=data_columns,\n",
+ " )\n",
+ "\n",
+ " for feature in metrics:\n",
+ " live.log_metric(feature[0], round(feature[1], 3))\n",
+ "\n",
+ " live.next_step()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Pc3jDX1q-y3c"
+ },
+ "source": [
+ "To explore the results from CLI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "source": [
- "with Live(report=\"notebook\") as live:\n",
- " for date in experiment_batches:\n",
- " live.log_param(\"begin\", date[0])\n",
- " live.log_param(\"end\", date[1])\n",
- "\n",
- " metrics = eval_drift(\n",
- " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n",
- " df.loc[df.dteday.between(date[0], date[1])],\n",
- " column_mapping=data_columns,\n",
- " )\n",
- "\n",
- " for feature in metrics:\n",
- " live.log_metric(feature[0], round(feature[1], 3))\n",
- "\n",
- " live.next_step()"
- ],
- "metadata": {
- "id": "_h-jGJqPiA30",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468156663,
- "user_tz": -120,
- "elapsed": 16055,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- },
- "outputId": "0b949e24-8c53-4765-a8ee-64d002b3801e"
- },
- "execution_count": 14,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "text/markdown": "# DVC Report\n\nparams.yaml\n\n| begin | end |\n|---------------------|---------------------|\n| 2011-02-15 00:00:00 | 2011-02-21 23:00:00 |\n\nmetrics.json\n\n| weathersit | temp | atemp | hum | windspeed | holiday | workingday | step |\n|--------------|--------|---------|-------|-------------|-----------|--------------|--------|\n| 0.231 | 0 | 0 | 0.062 | 0.012 | 0.275 | 0.593 | 3 |\n\n![static/holiday]()\n\n![static/windspeed]()\n\n![static/temp]()\n\n![static/workingday]()\n\n![static/weathersit]()\n\n![static/hum]()\n\n![static/atemp]()\n"
- },
- "metadata": {}
- }
- ]
+ "executionInfo": {
+ "elapsed": 1434,
+ "status": "ok",
+ "timestamp": 1697468158085,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "6OAsURiL-Ge2",
+ "outputId": "0fb47be1-f524-41f1-8c74-d4663d721290"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "source": [
- "To explore the results from CLI:"
- ],
- "metadata": {
- "id": "Pc3jDX1q-y3c"
- }
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\rReading plot's data from workspace: 0% 0/7 [00:00, ?files/s]\rReading plot's data from workspace: 0% 0/7 [00:00, ?files/s{'info': ''}]\r \rfile:///content/experiments/dvc_plots/index.html\n"
+ ]
+ }
+ ],
+ "source": [
+ "!dvc plots show"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 802
},
- {
- "cell_type": "code",
- "source": [
- "!dvc plots show"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "6OAsURiL-Ge2",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468158085,
- "user_tz": -120,
- "elapsed": 1434,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "0fb47be1-f524-41f1-8c74-d4663d721290"
- },
- "execution_count": 15,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\rReading plot's data from workspace: 0% 0/7 [00:00, ?files/s]\rReading plot's data from workspace: 0% 0/7 [00:00, ?files/s{'info': ''}]\r \rfile:///content/experiments/dvc_plots/index.html\n"
- ]
- }
- ]
+ "executionInfo": {
+ "elapsed": 4,
+ "status": "ok",
+ "timestamp": 1697468158085,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "pwdjxeEG-I49",
+ "outputId": "e01c74f9-45b6-4715-b1bd-2dd20875a421"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [
- "import IPython\n",
- "IPython.display.HTML(filename='dvc_plots/index.html')"
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " DVC Plot\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ ""
],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 802
- },
- "id": "pwdjxeEG-I49",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468158085,
- "user_tz": -120,
- "elapsed": 4,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "e01c74f9-45b6-4715-b1bd-2dd20875a421"
- },
- "execution_count": 16,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- ""
- ],
- "text/html": [
- "\n",
- "\n",
- "\n",
- " \n",
- " DVC Plot\n",
- " \n",
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- " \n",
- " \n",
- "
\n",
- " \n",
- "\n",
- ""
- ]
- },
- "metadata": {},
- "execution_count": 16
- }
+ "text/plain": [
+ ""
]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import IPython\n",
+ "\n",
+ "IPython.display.HTML(filename=\"dvc_plots/index.html\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CCdF_ipAIY7k"
+ },
+ "source": [
+ "## In multiple experiments (one per step)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "markdown",
- "source": [
- "## In multiple experiments (one per step)"
- ],
- "metadata": {
- "id": "CCdF_ipAIY7k"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# Setup a git repo with dvc\n",
- "\n",
- "%cd /content\n",
- "!rm -rf experiments && mkdir experiments\n",
- "%cd experiments\n",
- "\n",
- "!git init\n",
- "!git add .gitignore\n",
- "!git commit -m \"Init repo\"\n",
- "!dvc init\n",
- "!git commit -m \"Init DVC\""
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "0x81BAI--2Gm",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468159295,
- "user_tz": -120,
- "elapsed": 1213,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "7fb22cea-d367-41b0-f27d-a99e9d6081dc"
- },
- "execution_count": 17,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "/content\n",
- "/content/experiments\n",
- "hint: Using 'master' as the name for the initial branch. This default branch name\n",
- "hint: is subject to change. To configure the initial branch name to use in all\n",
- "hint: of your new repositories, which will suppress this warning, call:\n",
- "hint: \n",
- "hint: \tgit config --global init.defaultBranch \n",
- "hint: \n",
- "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n",
- "hint: 'development'. The just-created branch can be renamed via this command:\n",
- "hint: \n",
- "hint: \tgit branch -m \n",
- "Initialized empty Git repository in /content/experiments/.git/\n",
- "fatal: pathspec '.gitignore' did not match any files\n",
- "On branch master\n",
- "\n",
- "Initial commit\n",
- "\n",
- "nothing to commit (create/copy files and use \"git add\" to track)\n",
- "Initialized DVC repository.\n",
- "\n",
- "You can now commit the changes to git.\n",
- "\n",
- "+---------------------------------------------------------------------+\n",
- "| |\n",
- "| DVC has enabled anonymous aggregate usage analytics. |\n",
- "| Read the analytics documentation (and how to opt-out) here: |\n",
- "| |\n",
- "| |\n",
- "+---------------------------------------------------------------------+\n",
- "\n",
- "What's next?\n",
- "------------\n",
- "- Check out the documentation: \n",
- "- Get help and share ideas: \n",
- "- Star us on GitHub: \n",
- "[master (root-commit) 469083d] Init DVC\n",
- " 3 files changed, 6 insertions(+)\n",
- " create mode 100644 .dvc/.gitignore\n",
- " create mode 100644 .dvc/config\n",
- " create mode 100644 .dvcignore\n"
- ]
- }
- ]
+ "executionInfo": {
+ "elapsed": 1213,
+ "status": "ok",
+ "timestamp": 1697468159295,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "0x81BAI--2Gm",
+ "outputId": "7fb22cea-d367-41b0-f27d-a99e9d6081dc"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [
- "from dvclive import Live\n",
- "\n",
- "for step, date in enumerate(experiment_batches):\n",
- " with Live() as live:\n",
- " live.log_param(\"step\", step)\n",
- " live.log_param(\"begin\", date[0])\n",
- " live.log_param(\"end\", date[1])\n",
- "\n",
- " metrics = eval_drift(\n",
- " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n",
- " df.loc[df.dteday.between(date[0], date[1])],\n",
- " column_mapping=data_columns,\n",
- " )\n",
- "\n",
- " for feature in metrics:\n",
- " live.log_metric(feature[0], round(feature[1], 3))\n"
- ],
- "metadata": {
- "id": "VfVLDwfD39qO",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468161649,
- "user_tz": -120,
- "elapsed": 2355,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 18,
- "outputs": []
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/content\n",
+ "/content/experiments\n",
+ "hint: Using 'master' as the name for the initial branch. This default branch name\n",
+ "hint: is subject to change. To configure the initial branch name to use in all\n",
+ "hint: of your new repositories, which will suppress this warning, call:\n",
+ "hint: \n",
+ "hint: \tgit config --global init.defaultBranch \n",
+ "hint: \n",
+ "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n",
+ "hint: 'development'. The just-created branch can be renamed via this command:\n",
+ "hint: \n",
+ "hint: \tgit branch -m \n",
+ "Initialized empty Git repository in /content/experiments/.git/\n",
+ "fatal: pathspec '.gitignore' did not match any files\n",
+ "On branch master\n",
+ "\n",
+ "Initial commit\n",
+ "\n",
+ "nothing to commit (create/copy files and use \"git add\" to track)\n",
+ "Initialized DVC repository.\n",
+ "\n",
+ "You can now commit the changes to git.\n",
+ "\n",
+ "+---------------------------------------------------------------------+\n",
+ "| |\n",
+ "| DVC has enabled anonymous aggregate usage analytics. |\n",
+ "| Read the analytics documentation (and how to opt-out) here: |\n",
+ "| |\n",
+ "| |\n",
+ "+---------------------------------------------------------------------+\n",
+ "\n",
+ "What's next?\n",
+ "------------\n",
+ "- Check out the documentation: \n",
+ "- Get help and share ideas: \n",
+ "- Star us on GitHub: \n",
+ "[master (root-commit) 469083d] Init DVC\n",
+ " 3 files changed, 6 insertions(+)\n",
+ " create mode 100644 .dvc/.gitignore\n",
+ " create mode 100644 .dvc/config\n",
+ " create mode 100644 .dvcignore\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Setup a git repo with dvc\n",
+ "\n",
+ "%cd /content\n",
+ "!rm -rf experiments && mkdir experiments\n",
+ "%cd experiments\n",
+ "\n",
+ "!git init\n",
+ "!git add .gitignore\n",
+ "!git commit -m \"Init repo\"\n",
+ "!dvc init\n",
+ "!git commit -m \"Init DVC\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 2355,
+ "status": "ok",
+ "timestamp": 1697468161649,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
- {
- "cell_type": "code",
- "source": [
- "import dvc.api\n",
- "\n",
- "pd.DataFrame(dvc.api.exp_show())"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 238
- },
- "id": "ijcN3PaZ6fM0",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468162078,
- "user_tz": -120,
- "elapsed": 433,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "outputId": "2d26f834-604f-4e28-8924-f5d97ae92596"
- },
- "execution_count": 19,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- " Experiment rev typ Created parent State Executor \\\n",
- "0 None workspace baseline None None None None \n",
- "1 None master baseline 02:55 PM None None None \n",
- "2 elite-mobs e4d6acd branch_commit 02:56 PM None None None \n",
- "3 buxom-shes 439f6e1 branch_commit 02:56 PM None None None \n",
- "4 hammy-skip b5b80b5 branch_commit 02:55 PM None None None \n",
- "5 girly-sere 2ba9568 branch_base 02:55 PM None None None \n",
- "\n",
- " weathersit temp atemp hum windspeed holiday workingday step \\\n",
- "0 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n",
- "1 NaN NaN NaN NaN NaN NaN NaN NaN \n",
- "2 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n",
- "3 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2.0 \n",
- "4 0.985 1.000 1.000 1.000 1.000 0.980 0.851 NaN \n",
- "5 0.779 0.098 0.107 0.030 0.171 0.545 0.653 1.0 \n",
- "\n",
- " begin end \n",
- "0 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
- "1 None None \n",
- "2 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
- "3 2011-02-07 00:00:00 2011-02-14 23:00:00 \n",
- "4 2011-01-01 00:00:00 2011-01-29 23:00:00 \n",
- "5 2011-01-29 00:00:00 2011-02-07 23:00:00 "
- ],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Experiment | \n",
- " rev | \n",
- " typ | \n",
- " Created | \n",
- " parent | \n",
- " State | \n",
- " Executor | \n",
- " weathersit | \n",
- " temp | \n",
- " atemp | \n",
- " hum | \n",
- " windspeed | \n",
- " holiday | \n",
- " workingday | \n",
- " step | \n",
- " begin | \n",
- " end | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " None | \n",
- " workspace | \n",
- " baseline | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " 0.231 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.062 | \n",
- " 0.012 | \n",
- " 0.275 | \n",
- " 0.593 | \n",
- " 3.0 | \n",
- " 2011-02-15 00:00:00 | \n",
- " 2011-02-21 23:00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " None | \n",
- " master | \n",
- " baseline | \n",
- " 02:55 PM | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " None | \n",
- " None | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " elite-mobs | \n",
- " e4d6acd | \n",
- " branch_commit | \n",
- " 02:56 PM | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " 0.231 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.062 | \n",
- " 0.012 | \n",
- " 0.275 | \n",
- " 0.593 | \n",
- " 3.0 | \n",
- " 2011-02-15 00:00:00 | \n",
- " 2011-02-21 23:00:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " buxom-shes | \n",
- " 439f6e1 | \n",
- " branch_commit | \n",
- " 02:56 PM | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " 0.155 | \n",
- " 0.399 | \n",
- " 0.537 | \n",
- " 0.684 | \n",
- " 0.611 | \n",
- " 0.588 | \n",
- " 0.699 | \n",
- " 2.0 | \n",
- " 2011-02-07 00:00:00 | \n",
- " 2011-02-14 23:00:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " hammy-skip | \n",
- " b5b80b5 | \n",
- " branch_commit | \n",
- " 02:55 PM | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " 0.985 | \n",
- " 1.000 | \n",
- " 1.000 | \n",
- " 1.000 | \n",
- " 1.000 | \n",
- " 0.980 | \n",
- " 0.851 | \n",
- " NaN | \n",
- " 2011-01-01 00:00:00 | \n",
- " 2011-01-29 23:00:00 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " girly-sere | \n",
- " 2ba9568 | \n",
- " branch_base | \n",
- " 02:55 PM | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " 0.779 | \n",
- " 0.098 | \n",
- " 0.107 | \n",
- " 0.030 | \n",
- " 0.171 | \n",
- " 0.545 | \n",
- " 0.653 | \n",
- " 1.0 | \n",
- " 2011-01-29 00:00:00 | \n",
- " 2011-02-07 23:00:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ]
- },
- "metadata": {},
- "execution_count": 19
- }
- ]
+ "id": "VfVLDwfD39qO"
+ },
+ "outputs": [],
+ "source": [
+ "from dvclive import Live\n",
+ "\n",
+ "for step, date in enumerate(experiment_batches):\n",
+ " with Live() as live:\n",
+ " live.log_param(\"step\", step)\n",
+ " live.log_param(\"begin\", date[0])\n",
+ " live.log_param(\"end\", date[1])\n",
+ "\n",
+ " metrics = eval_drift(\n",
+ " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n",
+ " df.loc[df.dteday.between(date[0], date[1])],\n",
+ " column_mapping=data_columns,\n",
+ " )\n",
+ "\n",
+ " for feature in metrics:\n",
+ " live.log_metric(feature[0], round(feature[1], 3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 238
},
- {
- "cell_type": "markdown",
- "source": [
- "To explore the results from CLI:"
- ],
- "metadata": {
- "id": "TQE5aBWl-sef"
- }
+ "executionInfo": {
+ "elapsed": 433,
+ "status": "ok",
+ "timestamp": 1697468162078,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
},
+ "id": "ijcN3PaZ6fM0",
+ "outputId": "2d26f834-604f-4e28-8924-f5d97ae92596"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [
- "!dvc exp show"
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Experiment | \n",
+ " rev | \n",
+ " typ | \n",
+ " Created | \n",
+ " parent | \n",
+ " State | \n",
+ " Executor | \n",
+ " weathersit | \n",
+ " temp | \n",
+ " atemp | \n",
+ " hum | \n",
+ " windspeed | \n",
+ " holiday | \n",
+ " workingday | \n",
+ " step | \n",
+ " begin | \n",
+ " end | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " None | \n",
+ " workspace | \n",
+ " baseline | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 0.231 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.062 | \n",
+ " 0.012 | \n",
+ " 0.275 | \n",
+ " 0.593 | \n",
+ " 3.0 | \n",
+ " 2011-02-15 00:00:00 | \n",
+ " 2011-02-21 23:00:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " None | \n",
+ " master | \n",
+ " baseline | \n",
+ " 02:55 PM | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " elite-mobs | \n",
+ " e4d6acd | \n",
+ " branch_commit | \n",
+ " 02:56 PM | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 0.231 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.062 | \n",
+ " 0.012 | \n",
+ " 0.275 | \n",
+ " 0.593 | \n",
+ " 3.0 | \n",
+ " 2011-02-15 00:00:00 | \n",
+ " 2011-02-21 23:00:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " buxom-shes | \n",
+ " 439f6e1 | \n",
+ " branch_commit | \n",
+ " 02:56 PM | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 0.155 | \n",
+ " 0.399 | \n",
+ " 0.537 | \n",
+ " 0.684 | \n",
+ " 0.611 | \n",
+ " 0.588 | \n",
+ " 0.699 | \n",
+ " 2.0 | \n",
+ " 2011-02-07 00:00:00 | \n",
+ " 2011-02-14 23:00:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " hammy-skip | \n",
+ " b5b80b5 | \n",
+ " branch_commit | \n",
+ " 02:55 PM | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 0.985 | \n",
+ " 1.000 | \n",
+ " 1.000 | \n",
+ " 1.000 | \n",
+ " 1.000 | \n",
+ " 0.980 | \n",
+ " 0.851 | \n",
+ " NaN | \n",
+ " 2011-01-01 00:00:00 | \n",
+ " 2011-01-29 23:00:00 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " girly-sere | \n",
+ " 2ba9568 | \n",
+ " branch_base | \n",
+ " 02:55 PM | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " 0.779 | \n",
+ " 0.098 | \n",
+ " 0.107 | \n",
+ " 0.030 | \n",
+ " 0.171 | \n",
+ " 0.545 | \n",
+ " 0.653 | \n",
+ " 1.0 | \n",
+ " 2011-01-29 00:00:00 | \n",
+ " 2011-02-07 23:00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
],
- "metadata": {
- "id": "oZtY-97bQj-Q",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468163295,
- "user_tz": -120,
- "elapsed": 1221,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- },
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "14eb8d4c-c9ce-4bb8-caba-42e46d45bb65"
- },
- "execution_count": 20,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
- " Experiment Created weathersit temp atemp hum windspeed holiday workingday step begin end \n",
- " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
- " workspace - 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
- " master 02:55 PM - - - - - - - - - - \n",
- " ├── e4d6acd [elite-mobs] 02:56 PM 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
- " ├── 439f6e1 [buxom-shes] 02:56 PM 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2 2011-02-07 00:00:00 2011-02-14 23:00:00 \n",
- " ├── b5b80b5 [hammy-skip] 02:55 PM 0.985 1 1 1 1 0.98 0.851 0 2011-01-01 00:00:00 2011-01-29 23:00:00 \n",
- " └── 2ba9568 [girly-sere] 02:55 PM 0.779 0.098 0.107 0.03 0.171 0.545 0.653 1 2011-01-29 00:00:00 2011-02-07 23:00:00 \n",
- " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n"
- ]
- }
+ "text/plain": [
+ " Experiment rev typ Created parent State Executor \\\n",
+ "0 None workspace baseline None None None None \n",
+ "1 None master baseline 02:55 PM None None None \n",
+ "2 elite-mobs e4d6acd branch_commit 02:56 PM None None None \n",
+ "3 buxom-shes 439f6e1 branch_commit 02:56 PM None None None \n",
+ "4 hammy-skip b5b80b5 branch_commit 02:55 PM None None None \n",
+ "5 girly-sere 2ba9568 branch_base 02:55 PM None None None \n",
+ "\n",
+ " weathersit temp atemp hum windspeed holiday workingday step \\\n",
+ "0 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n",
+ "1 NaN NaN NaN NaN NaN NaN NaN NaN \n",
+ "2 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n",
+ "3 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2.0 \n",
+ "4 0.985 1.000 1.000 1.000 1.000 0.980 0.851 NaN \n",
+ "5 0.779 0.098 0.107 0.030 0.171 0.545 0.653 1.0 \n",
+ "\n",
+ " begin end \n",
+ "0 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
+ "1 None None \n",
+ "2 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
+ "3 2011-02-07 00:00:00 2011-02-14 23:00:00 \n",
+ "4 2011-01-01 00:00:00 2011-01-29 23:00:00 \n",
+ "5 2011-01-29 00:00:00 2011-02-07 23:00:00 "
]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import dvc.api\n",
+ "\n",
+ "pd.DataFrame(dvc.api.exp_show())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TQE5aBWl-sef"
+ },
+ "source": [
+ "To explore the results from CLI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "executionInfo": {
+ "elapsed": 1221,
+ "status": "ok",
+ "timestamp": 1697468163295,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
+ },
+ "id": "oZtY-97bQj-Q",
+ "outputId": "14eb8d4c-c9ce-4bb8-caba-42e46d45bb65"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "source": [],
- "metadata": {
- "id": "QoYexufp-qw2",
- "executionInfo": {
- "status": "ok",
- "timestamp": 1697468163757,
- "user_tz": -120,
- "elapsed": 464,
- "user": {
- "displayName": "Francesco Motoko",
- "userId": "00974636158007469548"
- }
- }
- },
- "execution_count": 20,
- "outputs": []
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+ " Experiment Created weathersit temp atemp hum windspeed holiday workingday step begin end \n",
+ " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n",
+ " workspace - 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
+ " master 02:55 PM - - - - - - - - - - \n",
+ " ├── e4d6acd [elite-mobs] 02:56 PM 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n",
+ " ├── 439f6e1 [buxom-shes] 02:56 PM 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2 2011-02-07 00:00:00 2011-02-14 23:00:00 \n",
+ " ├── b5b80b5 [hammy-skip] 02:55 PM 0.985 1 1 1 1 0.98 0.851 0 2011-01-01 00:00:00 2011-01-29 23:00:00 \n",
+ " └── 2ba9568 [girly-sere] 02:55 PM 0.779 0.098 0.107 0.03 0.171 0.545 0.653 1 2011-01-29 00:00:00 2011-02-07 23:00:00 \n",
+ " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n"
+ ]
}
- ]
+ ],
+ "source": [
+ "!dvc exp show"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 464,
+ "status": "ok",
+ "timestamp": 1697468163757,
+ "user": {
+ "displayName": "Francesco Motoko",
+ "userId": "00974636158007469548"
+ },
+ "user_tz": -120
+ },
+ "id": "QoYexufp-qw2"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "authorship_tag": "ABX9TyNJAdha/v4n9zLqIfGakg0E",
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/examples/DVCLive-Fabric.ipynb b/examples/DVCLive-Fabric.ipynb
index 28bcffa9..d61c4517 100644
--- a/examples/DVCLive-Fabric.ipynb
+++ b/examples/DVCLive-Fabric.ipynb
@@ -1,315 +1,342 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "QKSE19fW_Dnj"
- },
- "source": [
- "# DVCLive and Lightning Fabric"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "q-C_4R_o_QGG"
- },
- "source": [
- "## Install dvclive"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "-XFbvwq7TSwN",
- "outputId": "15d0e3b5-bb4a-4b3e-d37f-21608d1822ed"
- },
- "outputs": [],
- "source": [
- "!pip install \"dvclive[lightning]\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "I6S6Uru1_Y0x"
- },
- "source": [
- "## Initialize DVC Repository"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "WcbvUl2uTV0y",
- "outputId": "aff9740c-26db-483d-ce30-cfef395f3cbb"
- },
- "outputs": [],
- "source": [
- "!git init -q\n",
- "!git config --local user.email \"you@example.com\"\n",
- "!git config --local user.name \"Your Name\"\n",
- "!dvc init -q\n",
- "!git commit -m \"DVC init\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "LmY4PLMh_cUk"
- },
- "source": [
- "## Imports"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "85qErT5yTEbN"
- },
- "outputs": [],
- "source": [
- "import argparse\n",
- "from os import path\n",
- "from types import SimpleNamespace\n",
- "\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import torch.nn.functional as F\n",
- "import torch.optim as optim\n",
- "import torchvision.transforms as T\n",
- "from lightning.fabric import Fabric, seed_everything\n",
- "from lightning.fabric.utilities.rank_zero import rank_zero_only\n",
- "from torch.optim.lr_scheduler import StepLR\n",
- "from torchmetrics.classification import Accuracy\n",
- "from torchvision.datasets import MNIST\n",
- "\n",
- "from dvclive.fabric import DVCLiveLogger\n",
- "\n",
- "DATASETS_PATH = (\"Datasets\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "UrmAHbhr_lgs"
- },
- "source": [
- "## Setup model code\n",
- "\n",
- "Adapted from https://github.com/Lightning-AI/pytorch-lightning/blob/master/examples/fabric/image_classifier/train_fabric.py.\n",
- "\n",
- "Look for the `logger` statements where DVCLiveLogger calls were added."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "UCzTygUnTHM8"
- },
- "outputs": [],
- "source": [
- "class Net(nn.Module):\n",
- " def __init__(self) -> None:\n",
- " super().__init__()\n",
- " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n",
- " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n",
- " self.dropout1 = nn.Dropout(0.25)\n",
- " self.dropout2 = nn.Dropout(0.5)\n",
- " self.fc1 = nn.Linear(9216, 128)\n",
- " self.fc2 = nn.Linear(128, 10)\n",
- "\n",
- " def forward(self, x):\n",
- " x = self.conv1(x)\n",
- " x = F.relu(x)\n",
- " x = self.conv2(x)\n",
- " x = F.relu(x)\n",
- " x = F.max_pool2d(x, 2)\n",
- " x = self.dropout1(x)\n",
- " x = torch.flatten(x, 1)\n",
- " x = self.fc1(x)\n",
- " x = F.relu(x)\n",
- " x = self.dropout2(x)\n",
- " x = self.fc2(x)\n",
- " return F.log_softmax(x, dim=1)\n",
- "\n",
- "\n",
- "def run(hparams):\n",
- " # Create the DVCLive Logger\n",
- " logger = DVCLiveLogger(report=\"notebook\")\n",
- "\n",
- " # Log dict of hyperparameters\n",
- " logger.log_hyperparams(hparams.__dict__)\n",
- "\n",
- " # Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided\n",
- " # by the command line. See all options: `lightning run model --help`\n",
- " fabric = Fabric()\n",
- "\n",
- " seed_everything(hparams.seed) # instead of torch.manual_seed(...)\n",
- "\n",
- " transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])\n",
- "\n",
- " # Let rank 0 download the data first, then everyone will load MNIST\n",
- " with fabric.rank_zero_first(local=False): # set `local=True` if your filesystem is not shared between machines\n",
- " train_dataset = MNIST(DATASETS_PATH, download=fabric.is_global_zero, train=True, transform=transform)\n",
- " test_dataset = MNIST(DATASETS_PATH, download=fabric.is_global_zero, train=False, transform=transform)\n",
- "\n",
- " train_loader = torch.utils.data.DataLoader(\n",
- " train_dataset,\n",
- " batch_size=hparams.batch_size,\n",
- " )\n",
- " test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=hparams.batch_size)\n",
- "\n",
- " # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training.\n",
- " train_loader, test_loader = fabric.setup_dataloaders(train_loader, test_loader)\n",
- "\n",
- " model = Net() # remove call to .to(device)\n",
- " optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)\n",
- "\n",
- " # don't forget to call `setup` to prepare for model / optimizer for distributed training.\n",
- " # the model is moved automatically to the right device.\n",
- " model, optimizer = fabric.setup(model, optimizer)\n",
- "\n",
- " scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)\n",
- "\n",
- " # use torchmetrics instead of manually computing the accuracy\n",
- " test_acc = Accuracy(task=\"multiclass\", num_classes=10).to(fabric.device)\n",
- "\n",
- " # EPOCH LOOP\n",
- " for epoch in range(1, hparams.epochs + 1):\n",
- " # TRAINING LOOP\n",
- " model.train()\n",
- " for batch_idx, (data, target) in enumerate(train_loader):\n",
- " # NOTE: no need to call `.to(device)` on the data, target\n",
- " optimizer.zero_grad()\n",
- " output = model(data)\n",
- " loss = F.nll_loss(output, target)\n",
- " fabric.backward(loss) # instead of loss.backward()\n",
- "\n",
- " optimizer.step()\n",
- " if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0):\n",
- " print(\n",
- " \"Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}\".format(\n",
- " epoch,\n",
- " batch_idx * len(data),\n",
- " len(train_loader.dataset),\n",
- " 100.0 * batch_idx / len(train_loader),\n",
- " loss.item(),\n",
- " )\n",
- " )\n",
- "\n",
- " # Log dict of metrics\n",
- " logger.log_metrics({\"loss\": loss.item()})\n",
- "\n",
- " if hparams.dry_run:\n",
- " break\n",
- "\n",
- " scheduler.step()\n",
- "\n",
- " # TESTING LOOP\n",
- " model.eval()\n",
- " test_loss = 0\n",
- " with torch.no_grad():\n",
- " for data, target in test_loader:\n",
- " # NOTE: no need to call `.to(device)` on the data, target\n",
- " output = model(data)\n",
- " test_loss += F.nll_loss(output, target, reduction=\"sum\").item()\n",
- "\n",
- " # WITHOUT TorchMetrics\n",
- " # pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n",
- " # correct += pred.eq(target.view_as(pred)).sum().item()\n",
- "\n",
- " # WITH TorchMetrics\n",
- " test_acc(output, target)\n",
- "\n",
- " if hparams.dry_run:\n",
- " break\n",
- "\n",
- " # all_gather is used to aggregated the value across processes\n",
- " test_loss = fabric.all_gather(test_loss).sum() / len(test_loader.dataset)\n",
- "\n",
- " print(f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({100 * test_acc.compute():.0f}%)\\n\")\n",
- "\n",
- " # log additional metrics\n",
- " logger.log_metrics({\"test_loss\": test_loss, \"test_acc\": 100 * test_acc.compute()})\n",
- "\n",
- " test_acc.reset()\n",
- "\n",
- " if hparams.dry_run:\n",
- " break\n",
- "\n",
- " # When using distributed training, use `fabric.save`\n",
- " # to ensure the current process is allowed to save a checkpoint\n",
- " if hparams.save_model:\n",
- " fabric.save(\"mnist_cnn.pt\", model.state_dict())\n",
- "\n",
- " # `logger.experiment` provides access to the `dvclive.Live` instance where you can use additional logging methods.\n",
- " # Check that `rank_zero_only.rank == 0` to avoid logging in other processes.\n",
- " if rank_zero_only.rank == 0:\n",
- " logger.experiment.log_artifact(\"mnist_cnn.pt\")\n",
- "\n",
- " # Call finalize to save final results as a DVC experiment\n",
- " logger.finalize(\"success\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "o5_v9lRDAM7l"
- },
- "source": [
- "## Train the model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- },
- "id": "BbCXen1PTM4V",
- "outputId": "b79c90eb-74cc-474d-c0dd-21245064bca8"
- },
- "outputs": [],
- "source": [
- "hparams = SimpleNamespace(batch_size=64, epochs=5, lr=1.0, gamma=0.7, dry_run=False, seed=1, log_interval=10, save_model=True)\n",
- "run(hparams)"
- ]
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QKSE19fW_Dnj"
+ },
+ "source": [
+ "# DVCLive and Lightning Fabric"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "q-C_4R_o_QGG"
+ },
+ "source": [
+ "## Install dvclive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "DnqCrlbLAopV"
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
+ "id": "-XFbvwq7TSwN",
+ "outputId": "15d0e3b5-bb4a-4b3e-d37f-21608d1822ed"
+ },
+ "outputs": [],
+ "source": [
+ "%pip install \"dvclive[lightning]\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "I6S6Uru1_Y0x"
+ },
+ "source": [
+ "## Initialize DVC Repository"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
"colab": {
- "provenance": []
+ "base_uri": "https://localhost:8080/"
},
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
+ "id": "WcbvUl2uTV0y",
+ "outputId": "aff9740c-26db-483d-ce30-cfef395f3cbb"
+ },
+ "outputs": [],
+ "source": [
+ "!git init -q\n",
+ "!git config --local user.email \"you@example.com\"\n",
+ "!git config --local user.name \"Your Name\"\n",
+ "!dvc init -q\n",
+ "!git commit -m \"DVC init\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "LmY4PLMh_cUk"
+ },
+ "source": [
+ "## Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "85qErT5yTEbN"
+ },
+ "outputs": [],
+ "source": [
+ "from types import SimpleNamespace\n",
+ "\n",
+ "import torch\n",
+ "from torch import nn\n",
+ "import torch.nn.functional as F # noqa: N812\n",
+ "from torch import optim\n",
+ "import torchvision.transforms as T # noqa: N812\n",
+ "from lightning.fabric import Fabric, seed_everything\n",
+ "from lightning.fabric.utilities.rank_zero import rank_zero_only\n",
+ "from torch.optim.lr_scheduler import StepLR\n",
+ "from torchmetrics.classification import Accuracy\n",
+ "from torchvision.datasets import MNIST\n",
+ "\n",
+ "from dvclive.fabric import DVCLiveLogger\n",
+ "\n",
+ "DATASETS_PATH = \"Datasets\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UrmAHbhr_lgs"
+ },
+ "source": [
+ "## Setup model code\n",
+ "\n",
+ "Adapted from https://github.com/Lightning-AI/pytorch-lightning/blob/master/examples/fabric/image_classifier/train_fabric.py.\n",
+ "\n",
+ "Look for the `logger` statements where DVCLiveLogger calls were added."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "UCzTygUnTHM8"
+ },
+ "outputs": [],
+ "source": [
+ "class Net(nn.Module):\n",
+ " def __init__(self) -> None:\n",
+ " super().__init__()\n",
+ " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n",
+ " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n",
+ " self.dropout1 = nn.Dropout(0.25)\n",
+ " self.dropout2 = nn.Dropout(0.5)\n",
+ " self.fc1 = nn.Linear(9216, 128)\n",
+ " self.fc2 = nn.Linear(128, 10)\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x = self.conv1(x)\n",
+ " x = F.relu(x)\n",
+ " x = self.conv2(x)\n",
+ " x = F.relu(x)\n",
+ " x = F.max_pool2d(x, 2)\n",
+ " x = self.dropout1(x)\n",
+ " x = torch.flatten(x, 1)\n",
+ " x = self.fc1(x)\n",
+ " x = F.relu(x)\n",
+ " x = self.dropout2(x)\n",
+ " x = self.fc2(x)\n",
+ " return F.log_softmax(x, dim=1)\n",
+ "\n",
+ "\n",
+ "def run(hparams):\n",
+ " # Create the DVCLive Logger\n",
+ " logger = DVCLiveLogger(report=\"notebook\")\n",
+ "\n",
+ " # Log dict of hyperparameters\n",
+ " logger.log_hyperparams(hparams.__dict__)\n",
+ "\n",
+ " # Create the Lightning Fabric object. The parameters like accelerator, strategy,\n",
+ " # devices etc. will be proided by the command line. See all options: `lightning\n",
+ " # run model --help`\n",
+ " fabric = Fabric()\n",
+ "\n",
+ " seed_everything(hparams.seed) # instead of torch.manual_seed(...)\n",
+ "\n",
+ " transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])\n",
+ "\n",
+ " # Let rank 0 download the data first, then everyone will load MNIST\n",
+ " with fabric.rank_zero_first(\n",
+ " local=False\n",
+ " ): # set `local=True` if your filesystem is not shared between machines\n",
+ " train_dataset = MNIST(\n",
+ " DATASETS_PATH,\n",
+ " download=fabric.is_global_zero,\n",
+ " train=True,\n",
+ " transform=transform,\n",
+ " )\n",
+ " test_dataset = MNIST(\n",
+ " DATASETS_PATH,\n",
+ " download=fabric.is_global_zero,\n",
+ " train=False,\n",
+ " transform=transform,\n",
+ " )\n",
+ "\n",
+ " train_loader = torch.utils.data.DataLoader(\n",
+ " train_dataset,\n",
+ " batch_size=hparams.batch_size,\n",
+ " )\n",
+ " test_loader = torch.utils.data.DataLoader(\n",
+ " test_dataset, batch_size=hparams.batch_size\n",
+ " )\n",
+ "\n",
+ " # don't forget to call `setup_dataloaders` to prepare for dataloaders for\n",
+ " # distributed training.\n",
+ " train_loader, test_loader = fabric.setup_dataloaders(train_loader, test_loader)\n",
+ "\n",
+ " model = Net() # remove call to .to(device)\n",
+ " optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)\n",
+ "\n",
+ " # don't forget to call `setup` to prepare for model / optimizer for\n",
+ " # distributed training. The model is moved automatically to the right device.\n",
+ " model, optimizer = fabric.setup(model, optimizer)\n",
+ "\n",
+ " scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)\n",
+ "\n",
+ " # use torchmetrics instead of manually computing the accuracy\n",
+ " test_acc = Accuracy(task=\"multiclass\", num_classes=10).to(fabric.device)\n",
+ "\n",
+ " # EPOCH LOOP\n",
+ " for epoch in range(1, hparams.epochs + 1):\n",
+ " # TRAINING LOOP\n",
+ " model.train()\n",
+ " for batch_idx, (data, target) in enumerate(train_loader):\n",
+ " # NOTE: no need to call `.to(device)` on the data, target\n",
+ " optimizer.zero_grad()\n",
+ " output = model(data)\n",
+ " loss = F.nll_loss(output, target)\n",
+ " fabric.backward(loss) # instead of loss.backward()\n",
+ "\n",
+ " optimizer.step()\n",
+ " if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0):\n",
+ " done = (batch_idx * len(data)) / len(train_loader.dataset)\n",
+ " pct = 100.0 * batch_idx / len(train_loader)\n",
+ " print( # noqa: T201\n",
+ " f\"-> Epoch: {epoch} [{done} ({pct:.0f}%)]\\tLoss: {loss.item():.6f}\"\n",
+ " )\n",
+ "\n",
+ " # Log dict of metrics\n",
+ " logger.log_metrics({\"loss\": loss.item()})\n",
+ "\n",
+ " if hparams.dry_run:\n",
+ " break\n",
+ "\n",
+ " scheduler.step()\n",
+ "\n",
+ " # TESTING LOOP\n",
+ " model.eval()\n",
+ " test_loss = 0\n",
+ " with torch.no_grad():\n",
+ " for data, target in test_loader:\n",
+ " # NOTE: no need to call `.to(device)` on the data, target\n",
+ " output = model(data)\n",
+ " test_loss += F.nll_loss(output, target, reduction=\"sum\").item()\n",
+ "\n",
+ " # WITHOUT TorchMetrics\n",
+ " # pred = output.argmax(dim=1, keepdim=True) # get the index of the max\n",
+ " # log-probability correct += pred.eq(target.view_as(pred)).sum().item()\n",
+ "\n",
+ " # WITH TorchMetrics\n",
+ " test_acc(output, target)\n",
+ "\n",
+ " if hparams.dry_run:\n",
+ " break\n",
+ "\n",
+ " # all_gather is used to aggregated the value across processes\n",
+ " test_loss = fabric.all_gather(test_loss).sum() / len(test_loader.dataset)\n",
+ " acc = 100 * test_acc.compute()\n",
+ "\n",
+ " print( # noqa: T201\n",
+ " f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({acc:.0f}%)\\n\"\n",
+ " )\n",
+ "\n",
+ " # log additional metrics\n",
+ " logger.log_metrics(\n",
+ " {\"test_loss\": test_loss, \"test_acc\": 100 * test_acc.compute()}\n",
+ " )\n",
+ "\n",
+ " test_acc.reset()\n",
+ "\n",
+ " if hparams.dry_run:\n",
+ " break\n",
+ "\n",
+ " # When using distributed training, use `fabric.save`\n",
+ " # to ensure the current process is allowed to save a checkpoint\n",
+ " if hparams.save_model:\n",
+ " fabric.save(\"mnist_cnn.pt\", model.state_dict())\n",
+ "\n",
+ " # `logger.experiment` provides access to the `dvclive.Live` instance where you\n",
+ " # can use additional logging methods. Check that `rank_zero_only.rank == 0` to\n",
+ " # avoid logging in other processes.\n",
+ " if rank_zero_only.rank == 0:\n",
+ " logger.experiment.log_artifact(\"mnist_cnn.pt\")\n",
+ "\n",
+ " # Call finalize to save final results as a DVC experiment\n",
+ " logger.finalize(\"success\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "o5_v9lRDAM7l"
+ },
+ "source": [
+ "## Train the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
},
- "language_info": {
- "name": "python"
- }
+ "id": "BbCXen1PTM4V",
+ "outputId": "b79c90eb-74cc-474d-c0dd-21245064bca8"
+ },
+ "outputs": [],
+ "source": [
+ "hparams = SimpleNamespace(\n",
+ " batch_size=64,\n",
+ " epochs=5,\n",
+ " lr=1.0,\n",
+ " gamma=0.7,\n",
+ " dry_run=False,\n",
+ " seed=1,\n",
+ " log_interval=10,\n",
+ " save_model=True,\n",
+ ")\n",
+ "run(hparams)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "DnqCrlbLAopV"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
},
- "nbformat": 4,
- "nbformat_minor": 0
+ "language_info": {
+ "name": "python",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/examples/DVCLive-HuggingFace.ipynb b/examples/DVCLive-HuggingFace.ipynb
index 6f9a81dd..0d1946f2 100644
--- a/examples/DVCLive-HuggingFace.ipynb
+++ b/examples/DVCLive-HuggingFace.ipynb
@@ -106,11 +106,23 @@
"source": [
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-cased\")\n",
"\n",
+ "\n",
"def tokenize_function(examples):\n",
" return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
"\n",
- "small_train_dataset = dataset[\"train\"].shuffle(seed=42).select(range(2000)).map(tokenize_function, batched=True)\n",
- "small_eval_dataset = dataset[\"test\"].shuffle(seed=42).select(range(200)).map(tokenize_function, batched=True)"
+ "\n",
+ "small_train_dataset = (\n",
+ " dataset[\"train\"]\n",
+ " .shuffle(seed=42)\n",
+ " .select(range(2000))\n",
+ " .map(tokenize_function, batched=True)\n",
+ ")\n",
+ "small_eval_dataset = (\n",
+ " dataset[\"test\"]\n",
+ " .shuffle(seed=42)\n",
+ " .select(range(200))\n",
+ " .map(tokenize_function, batched=True)\n",
+ ")"
]
},
{
@@ -138,6 +150,7 @@
"\n",
"metric = evaluate.load(\"f1\")\n",
"\n",
+ "\n",
"def compute_metrics(eval_pred):\n",
" logits, labels = eval_pred\n",
" predictions = np.argmax(logits, axis=-1)\n",
@@ -178,7 +191,9 @@
"from transformers.integrations import DVCLiveCallback\n",
"from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
"\n",
- "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-cased\", num_labels=2)\n",
+ "model = AutoModelForSequenceClassification.from_pretrained(\n",
+ " \"distilbert-base-cased\", num_labels=2\n",
+ ")\n",
"for param in model.base_model.parameters():\n",
" param.requires_grad = False\n",
"\n",
@@ -224,7 +239,6 @@
"outputs": [],
"source": [
"from dvclive import Live\n",
- "from transformers.integrations import DVCLiveCallback\n",
"\n",
"lr = 1e-4\n",
"\n",
@@ -273,13 +287,13 @@
"import dvc.api\n",
"import pandas as pd\n",
"\n",
- "columns = [\"Experiment\", \"epoch\", \"eval.f1\"]\n",
+ "columns = [\"Experiment\", \"epoch\", \"eval.f1\"]\n",
"\n",
"df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n",
"\n",
"df.dropna(inplace=True)\n",
"df.reset_index(drop=True, inplace=True)\n",
- "df\n"
+ "df"
]
},
{
@@ -302,7 +316,8 @@
"outputs": [],
"source": [
"from IPython.display import HTML\n",
- "HTML(filename='./dvc_plots/index.html')"
+ "\n",
+ "HTML(filename=\"./dvc_plots/index.html\")"
]
}
],
diff --git a/examples/DVCLive-PyTorch-Lightning.ipynb b/examples/DVCLive-PyTorch-Lightning.ipynb
index 1e140e48..03f38d27 100644
--- a/examples/DVCLive-PyTorch-Lightning.ipynb
+++ b/examples/DVCLive-PyTorch-Lightning.ipynb
@@ -1,273 +1,275 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "A812CVYi_B2b"
- },
- "source": [
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "gPh2FiPo_B2e"
- },
- "source": [
- "# DVCLive and PyTorch Lightning"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "m0XW9Ml7_B2e"
- },
- "source": [
- "## Setup"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "QivH1_cU_B2f"
- },
- "outputs": [],
- "source": [
- "!pip install \"dvclive[lightning]\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "pn_5GW1f_B2g"
- },
- "outputs": [],
- "source": [
- "!git init -q\n",
- "!git config --local user.email \"you@example.com\"\n",
- "!git config --local user.name \"Your Name\"\n",
- "!dvc init -q\n",
- "!git commit -m \"DVC init\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "zC9hk7kibFTX"
- },
- "source": [
- "### Define LightningModule"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "t5PxdljP_B2h"
- },
- "outputs": [],
- "source": [
- "import lightning.pytorch as pl\n",
- "import torch\n",
- "\n",
- "class LitAutoEncoder(pl.LightningModule):\n",
- " def __init__(self, encoder_size=64, lr=1e-3):\n",
- " super().__init__()\n",
- " self.save_hyperparameters()\n",
- " self.encoder = torch.nn.Sequential(\n",
- " torch.nn.Linear(28 * 28, encoder_size),\n",
- " torch.nn.ReLU(),\n",
- " torch.nn.Linear(encoder_size, 3)\n",
- " )\n",
- " self.decoder = torch.nn.Sequential(\n",
- " torch.nn.Linear(3, encoder_size),\n",
- " torch.nn.ReLU(),\n",
- " torch.nn.Linear(encoder_size, 28 * 28)\n",
- " )\n",
- "\n",
- " def training_step(self, batch, batch_idx):\n",
- " x, y = batch\n",
- " x = x.view(x.size(0), -1)\n",
- " z = self.encoder(x)\n",
- " x_hat = self.decoder(z)\n",
- " train_mse = torch.nn.functional.mse_loss(x_hat, x)\n",
- " self.log(\"train_mse\", train_mse)\n",
- " return train_mse\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- " x, y = batch\n",
- " x = x.view(x.size(0), -1)\n",
- " z = self.encoder(x)\n",
- " x_hat = self.decoder(z)\n",
- " val_mse = torch.nn.functional.mse_loss(x_hat, x)\n",
- " self.log(\"val_mse\", val_mse)\n",
- " return val_mse\n",
- "\n",
- " def configure_optimizers(self):\n",
- " optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)\n",
- " return optimizer"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "St0ElX9obqRS"
- },
- "source": [
- "### Dataset and loaders"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "T5s53qgr_B2h"
- },
- "outputs": [],
- "source": [
- "from torchvision.datasets import MNIST\n",
- "import torchvision.transforms as transforms\n",
- "\n",
- "transform = transforms.ToTensor()\n",
- "train_set = MNIST(root=\"MNIST\", download=True, train=True, transform=transform)\n",
- "validation_set = MNIST(root=\"MNIST\", download=True, train=False, transform=transform)\n",
- "train_loader = torch.utils.data.DataLoader(train_set)\n",
- "validation_loader = torch.utils.data.DataLoader(validation_set)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ttiwwreH_B2i"
- },
- "source": [
- "# Tracking experiments with DVCLive"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "sE6qj6BMoDkn"
- },
- "outputs": [],
- "source": [
- "from dvclive.lightning import DVCLiveLogger"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "XDqNY8pL_B2i"
- },
- "outputs": [],
- "source": [
- "for encoder_size in (64, 128):\n",
- " for lr in (1e-3, 0.1):\n",
- " model = LitAutoEncoder(encoder_size=encoder_size, lr=lr)\n",
- " trainer = pl.Trainer(\n",
- " limit_train_batches=200,\n",
- " limit_val_batches=100,\n",
- " max_epochs=5,\n",
- " logger=DVCLiveLogger(log_model=True, report=\"notebook\"),\n",
- " )\n",
- " trainer.fit(model, train_loader, validation_loader)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "7zEi0BXp_B2i"
- },
- "source": [
- "## Comparing results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "1aHmLHmf_B2i"
- },
- "outputs": [],
- "source": [
- "import dvc.api\n",
- "import pandas as pd\n",
- "\n",
- "columns = [\"Experiment\", \"encoder_size\", \"lr\", \"train.mse\", \"val.mse\"]\n",
- "\n",
- "df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n",
- "\n",
- "df.dropna(inplace=True)\n",
- "df.reset_index(drop=True, inplace=True)\n",
- "df\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "db42qeHEGqTA"
- },
- "outputs": [],
- "source": [
- "from plotly.express import parallel_coordinates\n",
- "fig = parallel_coordinates(df, columns, color=\"val.mse\")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "3cfvi0Uk_B2j"
- },
- "outputs": [],
- "source": [
- "!dvc plots diff $(dvc exp list --names-only)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "Zx5n2zbn_B2j"
- },
- "outputs": [],
- "source": [
- "from IPython.display import HTML\n",
- "HTML(filename='./dvc_plots/index.html')"
- ]
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": [],
- "toc_visible": true
- },
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.16"
- }
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "A812CVYi_B2b"
+ },
+ "source": [
+ ""
+ ]
},
- "nbformat": 4,
- "nbformat_minor": 0
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gPh2FiPo_B2e"
+ },
+ "source": [
+ "# DVCLive and PyTorch Lightning"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "m0XW9Ml7_B2e"
+ },
+ "source": [
+ "## Setup"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "QivH1_cU_B2f"
+ },
+ "outputs": [],
+ "source": [
+ "%pip install \"dvclive[lightning]\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "pn_5GW1f_B2g"
+ },
+ "outputs": [],
+ "source": [
+ "!git init -q\n",
+ "!git config --local user.email \"you@example.com\"\n",
+ "!git config --local user.name \"Your Name\"\n",
+ "!dvc init -q\n",
+ "!git commit -m \"DVC init\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zC9hk7kibFTX"
+ },
+ "source": [
+ "### Define LightningModule"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "t5PxdljP_B2h"
+ },
+ "outputs": [],
+ "source": [
+ "import lightning.pytorch as pl\n",
+ "import torch\n",
+ "\n",
+ "\n",
+ "class LitAutoEncoder(pl.LightningModule):\n",
+ " def __init__(self, encoder_size=64, lr=1e-3): # noqa: ARG002\n",
+ " super().__init__()\n",
+ " self.save_hyperparameters()\n",
+ " self.encoder = torch.nn.Sequential(\n",
+ " torch.nn.Linear(28 * 28, encoder_size),\n",
+ " torch.nn.ReLU(),\n",
+ " torch.nn.Linear(encoder_size, 3),\n",
+ " )\n",
+ " self.decoder = torch.nn.Sequential(\n",
+ " torch.nn.Linear(3, encoder_size),\n",
+ " torch.nn.ReLU(),\n",
+ " torch.nn.Linear(encoder_size, 28 * 28),\n",
+ " )\n",
+ "\n",
+ " def training_step(self, batch, batch_idx): # noqa: ARG002\n",
+ " x, y = batch\n",
+ " x = x.view(x.size(0), -1)\n",
+ " z = self.encoder(x)\n",
+ " x_hat = self.decoder(z)\n",
+ " train_mse = torch.nn.functional.mse_loss(x_hat, x)\n",
+ " self.log(\"train_mse\", train_mse)\n",
+ " return train_mse\n",
+ "\n",
+ " def validation_step(self, batch, batch_idx): # noqa: ARG002\n",
+ " x, y = batch\n",
+ " x = x.view(x.size(0), -1)\n",
+ " z = self.encoder(x)\n",
+ " x_hat = self.decoder(z)\n",
+ " val_mse = torch.nn.functional.mse_loss(x_hat, x)\n",
+ " self.log(\"val_mse\", val_mse)\n",
+ " return val_mse\n",
+ "\n",
+ " def configure_optimizers(self):\n",
+ " return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "St0ElX9obqRS"
+ },
+ "source": [
+ "### Dataset and loaders"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "T5s53qgr_B2h"
+ },
+ "outputs": [],
+ "source": [
+ "from torchvision.datasets import MNIST\n",
+ "from torchvision import transforms\n",
+ "\n",
+ "transform = transforms.ToTensor()\n",
+ "train_set = MNIST(root=\"MNIST\", download=True, train=True, transform=transform)\n",
+ "validation_set = MNIST(root=\"MNIST\", download=True, train=False, transform=transform)\n",
+ "train_loader = torch.utils.data.DataLoader(train_set)\n",
+ "validation_loader = torch.utils.data.DataLoader(validation_set)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ttiwwreH_B2i"
+ },
+ "source": [
+ "# Tracking experiments with DVCLive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "sE6qj6BMoDkn"
+ },
+ "outputs": [],
+ "source": [
+ "from dvclive.lightning import DVCLiveLogger"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "XDqNY8pL_B2i"
+ },
+ "outputs": [],
+ "source": [
+ "for encoder_size in (64, 128):\n",
+ " for lr in (1e-3, 0.1):\n",
+ " model = LitAutoEncoder(encoder_size=encoder_size, lr=lr)\n",
+ " trainer = pl.Trainer(\n",
+ " limit_train_batches=200,\n",
+ " limit_val_batches=100,\n",
+ " max_epochs=5,\n",
+ " logger=DVCLiveLogger(log_model=True, report=\"notebook\"),\n",
+ " )\n",
+ " trainer.fit(model, train_loader, validation_loader)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7zEi0BXp_B2i"
+ },
+ "source": [
+ "## Comparing results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1aHmLHmf_B2i"
+ },
+ "outputs": [],
+ "source": [
+ "import dvc.api\n",
+ "import pandas as pd\n",
+ "\n",
+ "columns = [\"Experiment\", \"encoder_size\", \"lr\", \"train.mse\", \"val.mse\"]\n",
+ "\n",
+ "df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n",
+ "\n",
+ "df.dropna(inplace=True)\n",
+ "df.reset_index(drop=True, inplace=True)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "db42qeHEGqTA"
+ },
+ "outputs": [],
+ "source": [
+ "from plotly.express import parallel_coordinates\n",
+ "\n",
+ "fig = parallel_coordinates(df, columns, color=\"val.mse\")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "3cfvi0Uk_B2j"
+ },
+ "outputs": [],
+ "source": [
+ "!dvc plots diff $(dvc exp list --names-only)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Zx5n2zbn_B2j"
+ },
+ "outputs": [],
+ "source": [
+ "from IPython.display import HTML\n",
+ "\n",
+ "HTML(filename=\"./dvc_plots/index.html\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
diff --git a/examples/DVCLive-Quickstart.ipynb b/examples/DVCLive-Quickstart.ipynb
index 00026152..188226b0 100644
--- a/examples/DVCLive-Quickstart.ipynb
+++ b/examples/DVCLive-Quickstart.ipynb
@@ -28,7 +28,7 @@
"metadata": {},
"outputs": [],
"source": [
- "!pip install dvclive"
+ "%pip install dvclive"
]
},
{
@@ -65,11 +65,10 @@
"metadata": {},
"outputs": [],
"source": [
- "#@title Training helpers. { display-mode: \"form\" }\n",
+ "# @title Training helpers. { display-mode: \"form\" }\n",
"\n",
"import numpy as np\n",
"import torch\n",
- "import torch.nn.functional as F\n",
"import torchvision\n",
"\n",
"from dvclive import Live\n",
@@ -79,15 +78,14 @@
"\n",
"def transform(dataset):\n",
" \"\"\"Get inputs and targets from dataset.\"\"\"\n",
- " x = dataset.data.reshape(len(dataset.data), 1, 28, 28)/255\n",
+ " x = dataset.data.reshape(len(dataset.data), 1, 28, 28) / 255\n",
" y = dataset.targets\n",
" return x.to(device), y.to(device)\n",
"\n",
"\n",
"def train_one_epoch(model, criterion, x, y, lr, weight_decay):\n",
" model.train()\n",
- " optimizer = torch.optim.Adam(\n",
- " model.parameters(), lr=lr, weight_decay=weight_decay)\n",
+ " optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n",
" y_pred = model(x)\n",
" loss = criterion(y_pred, y)\n",
" optimizer.zero_grad()\n",
@@ -99,8 +97,7 @@
" \"\"\"Get model prediction scores.\"\"\"\n",
" model.eval()\n",
" with torch.no_grad():\n",
- " y_pred = model(x)\n",
- " return y_pred\n",
+ " return model(x)\n",
"\n",
"\n",
"def get_metrics(y, y_pred, y_pred_label):\n",
@@ -108,7 +105,7 @@
" metrics = {}\n",
" criterion = torch.nn.CrossEntropyLoss()\n",
" metrics[\"loss\"] = criterion(y_pred, y).item()\n",
- " metrics[\"acc\"] = (y_pred_label == y).sum().item()/len(y)\n",
+ " metrics[\"acc\"] = (y_pred_label == y).sum().item() / len(y)\n",
" return metrics\n",
"\n",
"\n",
@@ -123,6 +120,7 @@
"\n",
" return metrics, actual, predicted\n",
"\n",
+ "\n",
"def get_missclassified_image(actual, predicted, dataset):\n",
" confusion = {}\n",
" for n, (a, p) in enumerate(zip(actual, predicted)):\n",
@@ -130,20 +128,23 @@
" confusion[(a, p)] = image\n",
"\n",
" max_i, max_j = 0, 0\n",
- " for (i, j) in confusion:\n",
- " if i > max_i:\n",
- " max_i = i\n",
- " if j > max_j:\n",
- " max_j = j\n",
+ " for i, j in confusion:\n",
+ " max_i = max(i, max_i)\n",
+ " max_j = max(j, max_j)\n",
"\n",
" frame_size = 30\n",
" image_shape = (28, 28)\n",
" incorrect_color = np.array((255, 100, 100), dtype=\"uint8\")\n",
" label_color = np.array((100, 100, 240), dtype=\"uint8\")\n",
"\n",
- " out_matrix = np.ones(shape=((max_i+2) * frame_size, (max_j+2) * frame_size, 3), dtype=\"uint8\") * 240\n",
+ " out_matrix = (\n",
+ " np.ones(\n",
+ " shape=((max_i + 2) * frame_size, (max_j + 2) * frame_size, 3), dtype=\"uint8\"\n",
+ " )\n",
+ " * 240\n",
+ " )\n",
"\n",
- " for i in range(max_i+1):\n",
+ " for i in range(max_i + 1):\n",
" if (i, i) in confusion:\n",
" image = confusion[(i, i)]\n",
" xs = (i + 1) * frame_size + 1\n",
@@ -154,14 +155,14 @@
" out_matrix[xs:xe, ys:ye, c] = (1 - image) * label_color[c]\n",
" out_matrix[ys:ye, xs:xe, c] = (1 - image) * label_color[c]\n",
"\n",
- " for (i, j) in confusion:\n",
+ " for i, j in confusion: # noqa: PLC0206\n",
" image = confusion[(i, j)]\n",
- " assert image.shape == image_shape\n",
+ " assert image.shape == image_shape # noqa: S101\n",
" xs = (i + 1) * frame_size + 1\n",
" xe = (i + 2) * frame_size - 1\n",
" ys = (j + 1) * frame_size + 1\n",
" ye = (j + 2) * frame_size - 1\n",
- " assert (xe-xs, ye-ys) == image_shape\n",
+ " assert (xe - xs, ye - ys) == image_shape # noqa: S101\n",
" if i != j:\n",
" for c in range(3):\n",
" out_matrix[xs:xe, ys:ye, c] = (1 - image) * incorrect_color[c]\n",
@@ -175,7 +176,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#@title Initialize model and dataset. { display-mode: \"form\" }\n",
+ "# @title Initialize model and dataset. { display-mode: \"form\" }\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Flatten(),\n",
@@ -212,40 +213,33 @@
"source": [
"# You can modify these parameters to see how they affect the training\n",
"# And run the cell several times\n",
- "params = {\n",
- " \"epochs\": 5,\n",
- " \"lr\": 0.003,\n",
- " \"weight_decay\": 0\n",
- "}\n",
+ "params = {\"epochs\": 5, \"lr\": 0.003, \"weight_decay\": 0}\n",
"\n",
"best_test_acc = 0\n",
"\n",
"with Live(report=\"notebook\") as live:\n",
- "\n",
" live.log_params(params)\n",
"\n",
" for _ in range(params[\"epochs\"]):\n",
- "\n",
" train_one_epoch(\n",
" model, criterion, x_train, y_train, params[\"lr\"], params[\"weight_decay\"]\n",
" )\n",
"\n",
" # Train Evaluation\n",
- " metrics_train, acual_train, predicted_train = evaluate(\n",
- " model, x_train, y_train)\n",
+ " metrics_train, acual_train, predicted_train = evaluate(model, x_train, y_train)\n",
"\n",
" for k, v in metrics_train.items():\n",
" live.log_metric(f\"train/{k}\", v)\n",
"\n",
" live.log_sklearn_plot(\n",
- " \"confusion_matrix\", \n",
- " acual_train, predicted_train, \n",
- " name=\"train/confusion_matrix\"\n",
+ " \"confusion_matrix\",\n",
+ " acual_train,\n",
+ " predicted_train,\n",
+ " name=\"train/confusion_matrix\",\n",
" )\n",
"\n",
" # Test Evaluation\n",
- " metrics_test, actual, predicted = evaluate(\n",
- " model, x_test, y_test)\n",
+ " metrics_test, actual, predicted = evaluate(model, x_test, y_test)\n",
"\n",
" for k, v in metrics_test.items():\n",
" live.log_metric(f\"test/{k}\", v)\n",
@@ -255,8 +249,7 @@
" )\n",
"\n",
" live.log_image(\n",
- " \"misclassified.jpg\", \n",
- " get_missclassified_image(actual, predicted, mnist_test)\n",
+ " \"misclassified.jpg\", get_missclassified_image(actual, predicted, mnist_test)\n",
" )\n",
"\n",
" # Save best model\n",
@@ -291,7 +284,7 @@
"\n",
"df.dropna(inplace=True)\n",
"df.reset_index(drop=True, inplace=True)\n",
- "df\n"
+ "df"
]
},
{
@@ -322,7 +315,8 @@
"outputs": [],
"source": [
"from IPython.display import HTML\n",
- "HTML(filename='./dvc_plots/index.html')"
+ "\n",
+ "HTML(filename=\"./dvc_plots/index.html\")"
]
}
],
diff --git a/examples/DVCLive-YOLO.ipynb b/examples/DVCLive-YOLO.ipynb
index 4c25a4f6..ed69e5ce 100644
--- a/examples/DVCLive-YOLO.ipynb
+++ b/examples/DVCLive-YOLO.ipynb
@@ -29,6 +29,7 @@
"source": [
"%pip install dvclive ultralytics\n",
"import ultralytics\n",
+ "\n",
"ultralytics.checks()"
]
},
@@ -104,6 +105,7 @@
"outputs": [],
"source": [
"from plotly.express import parallel_coordinates\n",
+ "\n",
"fig = parallel_coordinates(df, columns, color=\"metrics.mAP50-95(B)\")\n",
"fig.show()"
]
@@ -114,7 +116,7 @@
"metadata": {},
"outputs": [],
"source": [
- "!dvc plots diff $(dvc exp list --names-only) "
+ "!dvc plots diff $(dvc exp list --names-only)"
]
},
{
@@ -124,7 +126,8 @@
"outputs": [],
"source": [
"from IPython.display import HTML\n",
- "HTML(filename='./dvc_plots/index.html')"
+ "\n",
+ "HTML(filename=\"./dvc_plots/index.html\")"
]
}
],
diff --git a/tests/conftest.py b/tests/conftest.py
index 8b0e0dc7..ce7fc257 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,13 +7,13 @@
from dvclive.utils import rel_path
-@pytest.fixture()
+@pytest.fixture
def tmp_dir(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
return tmp_path
-@pytest.fixture()
+@pytest.fixture
def mocked_dvc_repo(tmp_dir, mocker):
_dvc_repo = mocker.MagicMock()
_dvc_repo.index.stages = []
@@ -28,13 +28,13 @@ def mocked_dvc_repo(tmp_dir, mocker):
return _dvc_repo
-@pytest.fixture()
+@pytest.fixture
def mocked_dvc_subrepo(tmp_dir, mocker, mocked_dvc_repo):
mocked_dvc_repo.root_dir = tmp_dir / "subdir"
return mocked_dvc_repo
-@pytest.fixture()
+@pytest.fixture
def dvc_repo(tmp_dir):
from dvc.repo import Repo
from scmrepo.git import Git
@@ -62,7 +62,7 @@ def _mocked_ci(monkeypatch):
monkeypatch.setenv("CI", "false")
-@pytest.fixture()
+@pytest.fixture
def mocked_studio_post(mocker, monkeypatch):
valid_response = mocker.MagicMock()
valid_response.status_code = 200
diff --git a/tests/frameworks/test_fastai.py b/tests/frameworks/test_fastai.py
index 452e54cd..77bdfa76 100644
--- a/tests/frameworks/test_fastai.py
+++ b/tests/frameworks/test_fastai.py
@@ -21,7 +21,7 @@
pytest.skip("skipping fastai tests", allow_module_level=True)
-@pytest.fixture()
+@pytest.fixture
def data_loader():
from pandas import DataFrame
diff --git a/tests/frameworks/test_huggingface.py b/tests/frameworks/test_huggingface.py
index 42db6d61..7b5f52f1 100644
--- a/tests/frameworks/test_huggingface.py
+++ b/tests/frameworks/test_huggingface.py
@@ -84,18 +84,18 @@ def forward(self, input_x, labels=None, **kwargs):
return (loss, y, y) if self.double_output else (loss, y)
-@pytest.fixture()
+@pytest.fixture
def data():
return RegressionDataset(), RegressionDataset()
-@pytest.fixture()
+@pytest.fixture
def model():
config = RegressionModelConfig()
return RegressionPreTrainedModel(config)
-@pytest.fixture()
+@pytest.fixture
def args():
return TrainingArguments(
"foo",
diff --git a/tests/frameworks/test_keras.py b/tests/frameworks/test_keras.py
index 46239091..ceca0baa 100644
--- a/tests/frameworks/test_keras.py
+++ b/tests/frameworks/test_keras.py
@@ -12,7 +12,7 @@
pytest.skip("skipping keras tests", allow_module_level=True)
-@pytest.fixture()
+@pytest.fixture
def xor_model():
import numpy as np
import tensorflow as tf
diff --git a/tests/frameworks/test_lgbm.py b/tests/frameworks/test_lgbm.py
index 250f355a..749365b2 100644
--- a/tests/frameworks/test_lgbm.py
+++ b/tests/frameworks/test_lgbm.py
@@ -17,12 +17,12 @@
pytest.skip("skipping lightgbm tests", allow_module_level=True)
-@pytest.fixture()
+@pytest.fixture
def model_params():
return {"objective": "multiclass", "n_estimators": 5, "seed": 0}
-@pytest.fixture()
+@pytest.fixture
def iris_data():
iris = datasets.load_iris()
x = pd.DataFrame(iris["data"], columns=iris["feature_names"])
diff --git a/tests/frameworks/test_xgboost.py b/tests/frameworks/test_xgboost.py
index 0b375450..dff0ec28 100644
--- a/tests/frameworks/test_xgboost.py
+++ b/tests/frameworks/test_xgboost.py
@@ -18,12 +18,12 @@
pytest.skip("skipping xgboost tests", allow_module_level=True)
-@pytest.fixture()
+@pytest.fixture
def train_params():
return {"objective": "multi:softmax", "num_class": 3, "seed": 0}
-@pytest.fixture()
+@pytest.fixture
def iris_data():
iris = datasets.load_iris()
x = pd.DataFrame(iris["data"], columns=iris["feature_names"])
@@ -31,7 +31,7 @@ def iris_data():
return xgb.DMatrix(x, y)
-@pytest.fixture()
+@pytest.fixture
def iris_train_eval_data():
iris = datasets.load_iris()
x_train, x_eval, y_train, y_eval = train_test_split(
diff --git a/tests/plots/test_sklearn.py b/tests/plots/test_sklearn.py
index 85b85dd9..26ccb464 100644
--- a/tests/plots/test_sklearn.py
+++ b/tests/plots/test_sklearn.py
@@ -8,7 +8,7 @@
from dvclive.plots.sklearn import SKLearnPlot
-@pytest.fixture()
+@pytest.fixture
def y_true_y_pred_y_score():
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
diff --git a/tests/test_make_report.py b/tests/test_make_report.py
index 4ccbc136..a5a2e7f8 100644
--- a/tests/test_make_report.py
+++ b/tests/test_make_report.py
@@ -104,7 +104,7 @@ def test_make_report(tmp_dir, mode):
last_report = current_report
-@pytest.mark.vscode()
+@pytest.mark.vscode
def test_make_report_open(tmp_dir, mocker, monkeypatch):
mocked_open = mocker.patch("webbrowser.open")
live = Live()
diff --git a/tests/test_monitor_system.py b/tests/test_monitor_system.py
index d704f00b..28d175bb 100644
--- a/tests/test_monitor_system.py
+++ b/tests/test_monitor_system.py
@@ -89,7 +89,7 @@ def mock_pynvml(mocker, num_gpus=2):
mocker.patch(f"{prefix}.nvmlDeviceGetUtilizationRates", return_value=gpu_usage)
-@pytest.fixture()
+@pytest.fixture
def cpu_metrics():
content = {
METRIC_CPU_COUNT: 6,
@@ -112,7 +112,7 @@ def _timeserie_schema(name, value):
return [{name: str(value), "timestamp": str, "step": "0"}]
-@pytest.fixture()
+@pytest.fixture
def cpu_timeseries():
return {
f"{METRIC_CPU_USAGE_PERCENT}.tsv": _timeserie_schema(
@@ -132,7 +132,7 @@ def cpu_timeseries():
}
-@pytest.fixture()
+@pytest.fixture
def gpu_timeseries():
return {
f"{METRIC_GPU_USAGE_PERCENT}/0.tsv": _timeserie_schema("0", 50.0),
diff --git a/tests/test_post_to_studio.py b/tests/test_post_to_studio.py
index a9ac8383..3f585eb0 100644
--- a/tests/test_post_to_studio.py
+++ b/tests/test_post_to_studio.py
@@ -211,7 +211,7 @@ def long_post(*args, **kwargs):
assert metrics_file.read_text() == metrics_content
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_skip_start_and_done_on_env_var(
tmp_dir, mocked_dvc_repo, mocked_studio_post, monkeypatch
):
@@ -230,7 +230,7 @@ def test_post_to_studio_skip_start_and_done_on_env_var(
assert "done" not in call_types
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_dvc_studio_config(
tmp_dir, mocker, mocked_dvc_repo, mocked_studio_post, monkeypatch
):
@@ -252,7 +252,7 @@ def test_post_to_studio_dvc_studio_config(
assert mocked_post.call_args.kwargs["headers"]["Authorization"] == "token token"
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_skip_if_no_token(
tmp_dir,
mocker,
@@ -297,7 +297,7 @@ def test_post_to_studio_shorten_names(tmp_dir, mocked_dvc_repo, mocked_studio_po
)
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_inside_dvc_exp(
tmp_dir, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo
):
@@ -318,7 +318,7 @@ def test_post_to_studio_inside_dvc_exp(
assert "done" not in call_types
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_inside_subdir(
tmp_dir, dvc_repo, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo
):
@@ -346,7 +346,7 @@ def test_post_to_studio_inside_subdir(
)
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_inside_subdir_dvc_exp(
tmp_dir, dvc_repo, monkeypatch, mocked_studio_post, mocked_dvc_repo
):
@@ -468,7 +468,7 @@ def test_post_to_studio_if_done_skipped(tmp_dir, mocked_dvc_repo, mocked_studio_
assert "data" in call_types
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post):
monkeypatch.setenv(DVC_STUDIO_TOKEN, "STUDIO_TOKEN")
monkeypatch.setenv(DVC_STUDIO_REPO_URL, "STUDIO_REPO_URL")
@@ -524,7 +524,7 @@ def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post):
)
-@pytest.mark.studio()
+@pytest.mark.studio
def test_post_to_studio_skip_if_no_repo_url(
tmp_dir,
mocker,
diff --git a/tests/test_vscode.py b/tests/test_vscode.py
index 7f56ab09..93b28565 100644
--- a/tests/test_vscode.py
+++ b/tests/test_vscode.py
@@ -6,7 +6,7 @@
from dvclive import Live, env
-@pytest.mark.vscode()
+@pytest.mark.vscode
@pytest.mark.parametrize("dvc_root", [True, False])
def test_vscode_dvclive_step_completed_signal_file(
tmp_dir, dvc_root, mocker, monkeypatch
@@ -58,7 +58,7 @@ def test_vscode_dvclive_step_completed_signal_file(
assert not os.path.exists(signal_file)
-@pytest.mark.vscode()
+@pytest.mark.vscode
@pytest.mark.parametrize("dvc_root", [True, False])
def test_vscode_dvclive_only_signal_file(tmp_dir, dvc_root, mocker):
signal_file = os.path.join(tmp_dir, ".dvc", "tmp", "exps", "run", "DVCLIVE_ONLY")