Add metrics (#841)

* Add metrics * Linter, typos * Fix plots * Fix typos * Fix headers * Linter * Add files via upload * Fix some tests and linter. * Fix recsys dataset. * Add render * fix_types * Fix DataDefinition in spark. * Skip ssl verification for bikes dataset. * Fix linter. * Try to fix test error. * Try to fix test error. * Ignore ssl error on bike download in test examples. * Skip SSL verify for bike dataset in examples. (cherry picked from commit 4cfcdd5) --------- Co-authored-by: 0lgaF <[email protected]> Co-authored-by: Vyacheslav Morov <[email protected]>
evidentlyai · Nov 13, 2023 · 812b94b · 812b94b
1 parent 4cfcdd5
commit 812b94b
Show file tree

Hide file tree

Showing 36 changed files with 2,611 additions and 20 deletions.
diff --git a/examples/how_to_questions/how_to_run_recsys_metrics.ipynb b/examples/how_to_questions/how_to_run_recsys_metrics.ipynb
@@ -0,0 +1,314 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8aJYBZFNMyXc",
+        "collapsed": true
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "    import evidently\n",
+        "except:\n",
+        "    !pip install git+https://github.com/evidentlyai/evidently.git"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "\n",
+        "import io\n",
+        "import os\n",
+        "import zipfile\n",
+        "\n",
+        "import requests"
+      ],
+      "metadata": {
+        "id": "UfuNPLwjO99K"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install implicit"
+      ],
+      "metadata": {
+        "id": "8A_dH0K0082d"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# load data"
+      ],
+      "metadata": {
+        "id": "KjF_x-wfcZOI"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "content = requests.get(\"http://files.grouplens.org/datasets/movielens/ml-100k.zip\").content\n",
+        "\n",
+        "with zipfile.ZipFile(io.BytesIO(content)) as arc:\n",
+        "  train = arc.read(\"ml-100k/ua.base\").decode().split(\"\\n\")\n",
+        "  test = arc.read(\"ml-100k/ua.test\").decode().split(\"\\n\")\n",
+        "  movies = arc.read(\"ml-100k/u.item\").decode(encoding='latin-1').split(\"\\n\")\n",
+        "  users = arc.read(\"ml-100k/u.user\").decode(encoding='latin-1').split(\"\\n\")"
+      ],
+      "metadata": {
+        "id": "f1wLolXpM02U"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columns = ['user_id', 'movie_id', 'rating', 'timestamp']\n",
+        "\n",
+        "data = [[x for x in e.split('\\t')] for e in train]\n",
+        "train = pd.DataFrame(data, columns=columns).dropna().astype(int)\n",
+        "\n",
+        "data = [[x for x in e.split('\\t')] for e in test]\n",
+        "test = pd.DataFrame(data, columns=columns).dropna().astype(int)\n",
+        "\n",
+        "columns = ['user_id', 'age', 'gender', 'occupation', 'zip_code']\n",
+        "\n",
+        "data = [[x for x in e.split('|')] for e in users]\n",
+        "users = pd.DataFrame(data, columns=columns).dropna().astype({'user_id': int, 'age': int})\n",
+        "\n",
+        "genres = ['unknown', 'action', 'adventure', 'animation', 'children', 'comedy', 'crime', 'documentary', 'drama', 'fantasy', 'noir',\n",
+        "          'horror', 'musical', 'mystery', 'romance', 'sci-fi', 'thriller', 'war', 'western']\n",
+        "columns = ['movie_id', 'title', 'year', '-', 'url'] + genres\n",
+        "data = [[x for x in e.split('|')] for e in movies]\n",
+        "movies = pd.DataFrame(data, columns=columns).dropna().astype({'movie_id': int})\n",
+        "movies.drop(columns=['-', 'url'], inplace=True)\n",
+        "movies[genres] = movies[genres].astype(int)\n",
+        "movies['moive_age'] = (pd.to_datetime(movies.year).max() - pd.to_datetime(movies.year)).dt.days / 365"
+      ],
+      "metadata": {
+        "id": "-V1w4P5LeV4X"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Utils"
+      ],
+      "metadata": {
+        "id": "luiAKltK9ze3"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def transform_predictions(k, user_ids, item_ids):\n",
+        "  return pd.DataFrame(\n",
+        "      data=np.c_[np.repeat(user_ids, k), item_ids.flatten(), [i + 1 for i in range(k)] * len(user_ids)],\n",
+        "      columns=['user_id', 'movie_id', 'rank']\n",
+        "  )\n",
+        "\n",
+        "\n",
+        "def prepare_prediction_df(k, user_ids, item_ids, true):\n",
+        "  preds = transform_predictions(k, user_ids, item_ids)\n",
+        "  preds = preds.merge(true, on=['user_id', 'movie_id'], how='outer')\n",
+        "  preds['rank'] = preds.groupby('user_id')['rank'].transform(lambda x: x.fillna(x.max() + 1))\n",
+        "  return preds\n",
+        "\n",
+        "\n",
+        "def get_embeddings(model, movies_list, users_list, factors):\n",
+        "  item_factors = pd.DataFrame(\n",
+        "      data=np.column_stack((movies_list, model.item_factors)),\n",
+        "      columns=['movie_id'] + [f'item_factor_{i+1}' for i in range(factors)]\n",
+        "  )\n",
+        "  user_factors = pd.DataFrame(\n",
+        "      data=np.column_stack((users_list, model.user_factors)),\n",
+        "      columns=['user_id'] + [f'user_factor_{i+1}' for i in range(factors)]\n",
+        "  )\n",
+        "  return item_factors, user_factors\n",
+        "\n",
+        "\n",
+        "def get_full_df(df, item_factors, user_factors):\n",
+        "  df = df.merge(movies, on=['movie_id'], how='left')\n",
+        "  df = df.merge(users, on=['user_id'], how='left')\n",
+        "  df = df.merge(item_factors, on=['movie_id'], how='left')\n",
+        "  df = df.merge(user_factors, on=['user_id'], how='left')\n",
+        "  return df"
+      ],
+      "metadata": {
+        "id": "MqP6bLDv92hY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Train model"
+      ],
+      "metadata": {
+        "id": "J4Z6jBMZcwnJ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's get predictions from two models - ALS model and most common item recommender"
+      ],
+      "metadata": {
+        "id": "XXba3z0w_y7p"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from implicit.cpu.als import AlternatingLeastSquares\n",
+        "from scipy.sparse import csr_matrix\n",
+        "pivot_table = train.pivot_table(index=['user_id'], columns=['movie_id'], values=\"rating\").fillna(0)\n",
+        "\n",
+        "als_model = AlternatingLeastSquares(factors=20, iterations=5, random_state=0)\n",
+        "als_model.fit(csr_matrix(pivot_table))"
+      ],
+      "metadata": {
+        "id": "-FQDsHEA3OKw"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "ids, scores = als_model.recommend(test.user_id.unique() - 1, csr_matrix(pivot_table.loc[test.user_id.unique()]), N=30, filter_already_liked_items=True)\n",
+        "als_df = prepare_prediction_df(30, test.user_id.unique(), ids, test)"
+      ],
+      "metadata": {
+        "id": "1kDXlE-FAEij"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "most_popular_top = list(train.movie_id.value_counts()[:30])\n",
+        "rec_array = np.array([most_popular_top] * len(test.user_id.unique()))\n",
+        "most_popular_df = prepare_prediction_df(30, test.user_id.unique(), rec_array, test)"
+      ],
+      "metadata": {
+        "id": "j6z-vy8jArCq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "item_factors, user_factors = get_embeddings(als_model, pivot_table.columns, pivot_table.index, 20)\n",
+        "als_df = get_full_df(als_df, item_factors, user_factors)\n",
+        "most_popular_df = get_full_df(most_popular_df, item_factors, user_factors)\n",
+        "train = get_full_df(train, item_factors, user_factors)"
+      ],
+      "metadata": {
+        "id": "3vuUgdDmAW5o"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "item_features = [f'item_factor_{i+1}' for i in range(20)]"
+      ],
+      "metadata": {
+        "id": "VlKggz4OCysy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from evidently.metrics import PrecisionTopKMetric\n",
+        "from evidently.metrics import RecallTopKMetric\n",
+        "from evidently.metrics import FBetaTopKMetric\n",
+        "from evidently.metrics import MAPKMetric\n",
+        "from evidently.metrics import NDCGKMetric\n",
+        "from evidently.metrics import DiversityMetric\n",
+        "from evidently.metrics import ItemBiasMetric\n",
+        "from evidently.metrics import NoveltyMetric\n",
+        "from evidently.metrics import PersonalisationMetric\n",
+        "from evidently.metrics import PopularityBias\n",
+        "from evidently.metrics import SerendipityMetric\n",
+        "from evidently.metrics import UserBiasMetric\n",
+        "from evidently.pipeline.column_mapping import ColumnMapping\n",
+        "from evidently.report import Report"
+      ],
+      "metadata": {
+        "id": "vxU6s88ism0_"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "report = Report(metrics=[\n",
+        "    PrecisionTopKMetric(k=5),\n",
+        "    RecallTopKMetric(k=5),\n",
+        "    FBetaTopKMetric(k=5),\n",
+        "    MAPKMetric(k=5),\n",
+        "    NDCGKMetric(k=5),\n",
+        "    DiversityMetric(k=5, item_features=item_features),\n",
+        "    NoveltyMetric(k=5),\n",
+        "    PersonalisationMetric(k=5),\n",
+        "    SerendipityMetric(k=5, item_features=item_features),\n",
+        "    PopularityBias(k=5),\n",
+        "    ItemBiasMetric(k=5, column_name='moive_age'),\n",
+        "    ItemBiasMetric(k=5, column_name='crime'),\n",
+        "    UserBiasMetric(column_name='age'),\n",
+        "    UserBiasMetric(column_name='gender')\n",
+        "\n",
+        "\n",
+        "])\n",
+        "column_mapping=ColumnMapping(recommendations_type='rank', target='rating', prediction='rank', item_id='title', user_id='user_id')\n",
+        "report.run(\n",
+        "    reference_data=most_popular_df.dropna(subset=['title', 'user_id']).fillna(0),\n",
+        "    current_data=als_df.dropna(subset=['title', 'user_id']).fillna(0),\n",
+        "    column_mapping=column_mapping,\n",
+        "    additional_datasets={'current_train_data': train.dropna(subset=['title', 'user_id'])}\n",
+        "  )\n",
+        "report"
+      ],
+      "metadata": {
+        "id": "7KIQreI6tKEA"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/src/evidently/base_metric.py b/src/evidently/base_metric.py
@@ -112,7 +112,6 @@ class GenericInputData:
     current_data: object
     column_mapping: ColumnMapping
     data_definition: DataDefinition
-
     additional_datasets: Dict[str, Any]
 
 
@@ -124,7 +123,6 @@ class InputData:
     current_additional_features: Optional[pd.DataFrame]
     column_mapping: ColumnMapping
     data_definition: DataDefinition
-
     additional_datasets: Dict[str, Any]
 
     @staticmethod

diff --git a/src/evidently/calculations/recommender_systems.py b/src/evidently/calculations/recommender_systems.py
@@ -72,3 +72,20 @@ def get_curr_and_ref_df(
         )
 
     return curr, ref
+
+
+def get_prediciton_name(data: InputData):
+    if isinstance(data.column_mapping.prediction, str):
+        return data.column_mapping.prediction
+    if "prediction" in data.current_data.columns and (
+        (data.reference_data is not None and "prediction" in data.reference_data.columns)
+        or (data.reference_data is None)
+    ):
+        return "prediction"
+    raise ValueError("Prediction should be presented and must be one column")
+
+
+def get_stats_novelty(train_data: pd.DataFrame, item_id: str, user_id: str):
+    user_interacted = train_data.groupby(item_id)[user_id].nunique()
+    interactions = user_interacted / user_interacted.shape[0]
+    return interactions