From a2178e3d47bc5f63d62adcedaf483546c03dd5b4 Mon Sep 17 00:00:00 2001 From: Marc Folch Date: Wed, 1 Nov 2023 16:22:37 +0100 Subject: [PATCH] [Marc Folch] lab-mysql-window-functions --- main.ipynb | 1287 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1274 insertions(+), 13 deletions(-) diff --git a/main.ipynb b/main.ipynb index dd1d140..d5e4ee6 100644 --- a/main.ipynb +++ b/main.ipynb @@ -1,5 +1,34 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "73d97991", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "import sqlalchemy as alch\n", + "from getpass import getpass\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e0a3fcb3", + "metadata": {}, + "outputs": [], + "source": [ + "from getpass import getpass\n", + "password = getpass()\n", + "\n", + "dbName = \"sakila\"\n", + "\n", + "connectionData=f\"mysql+pymysql://root:{password}@localhost/{dbName}\"\n", + "engine = alch.create_engine(connectionData)" + ] + }, { "cell_type": "markdown", "id": "6b1fed37", @@ -19,6 +48,144 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 25, + "id": "2e61fb3e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlerental_durationavg_rental_duration
0ACADEMY DINOSAUR65.0909
1ACE GOLDFINGER35.6667
2ADAPTATION HOLES73.4167
3AFFAIR PREJUDICE54.7273
4AFRICAN EGG67.0909
............
953YOUNG LANGUAGE64.5714
954YOUTH KICK45.6667
955ZHIVAGO CORE65.8750
956ZOOLANDER FICTION55.5882
957ZORRO ARK34.5161
\n", + "

958 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " title rental_duration avg_rental_duration\n", + "0 ACADEMY DINOSAUR 6 5.0909\n", + "1 ACE GOLDFINGER 3 5.6667\n", + "2 ADAPTATION HOLES 7 3.4167\n", + "3 AFFAIR PREJUDICE 5 4.7273\n", + "4 AFRICAN EGG 6 7.0909\n", + ".. ... ... ...\n", + "953 YOUNG LANGUAGE 6 4.5714\n", + "954 YOUTH KICK 4 5.6667\n", + "955 ZHIVAGO CORE 6 5.8750\n", + "956 ZOOLANDER FICTION 5 5.5882\n", + "957 ZORRO ARK 3 4.5161\n", + "\n", + "[958 rows x 3 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT film.title, film.rental_duration, AVG(DATEDIFF(rental.return_date, rental.rental_date)) as avg_rental_duration\n", + "FROM film\n", + "JOIN inventory ON film.film_id = inventory.film_id\n", + "JOIN rental ON inventory.inventory_id = rental.inventory_id\n", + "GROUP BY film.title, film.rental_duration\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "aef14486", @@ -63,12 +230,79 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 28, + "id": "c5933533", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
staff_idavg_payment_amount
014.156568
124.245125
\n", + "
" + ], + "text/plain": [ + " staff_id avg_payment_amount\n", + "0 1 4.156568\n", + "1 2 4.245125" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT staff_id, AVG(amount) as avg_payment_amount\n", + "FROM payment\n", + "GROUP BY staff_id\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "995debd9", "metadata": {}, "source": [ - "#### Expected output\n", + "#### Expected output\n", "\n", "|staff_id|avg_payment_amount|\n", "|--------|------------------|\n", @@ -86,7 +320,7 @@ "id": "b54831eb", "metadata": {}, "source": [ - "### 3. Calculate the total revenue for each customer, showing the running total within each customer's rental history:" + "### 3. Calculate the total revenue for each customer, showing the running total within each customer's rental history:" ] }, { @@ -100,12 +334,172 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5b8a33d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_idrental_dateamountrunning_total
01762005-05-25 11:30:372.992.99
115732005-05-28 10:35:230.993.98
2111852005-06-15 00:54:125.999.97
3114222005-06-15 18:02:530.9910.96
4114762005-06-15 21:08:469.9920.95
..................
16039599145992005-08-21 17:43:424.9966.85
16040599147192005-08-21 21:41:571.9968.84
16041599155902005-08-23 06:09:448.9977.83
16042599157192005-08-23 11:08:462.9980.82
16043599157252005-08-23 11:25:002.9983.81
\n", + "

16044 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_id rental_date amount running_total\n", + "0 1 76 2005-05-25 11:30:37 2.99 2.99\n", + "1 1 573 2005-05-28 10:35:23 0.99 3.98\n", + "2 1 1185 2005-06-15 00:54:12 5.99 9.97\n", + "3 1 1422 2005-06-15 18:02:53 0.99 10.96\n", + "4 1 1476 2005-06-15 21:08:46 9.99 20.95\n", + "... ... ... ... ... ...\n", + "16039 599 14599 2005-08-21 17:43:42 4.99 66.85\n", + "16040 599 14719 2005-08-21 21:41:57 1.99 68.84\n", + "16041 599 15590 2005-08-23 06:09:44 8.99 77.83\n", + "16042 599 15719 2005-08-23 11:08:46 2.99 80.82\n", + "16043 599 15725 2005-08-23 11:25:00 2.99 83.81\n", + "\n", + "[16044 rows x 5 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT rental.customer_id, rental.rental_id, rental.rental_date, payment.amount, SUM(payment.amount) over (partition by rental.customer_id ORDER BY rental.rental_date) AS running_total\n", + "FROM rental\n", + "JOIN payment ON rental.rental_id = payment.rental_id\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "60474671", "metadata": {}, "source": [ - "#### Expected output\n", + "#### Expected output\n", "\n", "|customer_id|rental_id|rental_date |amount|running_total|\n", "|-----------|---------|-------------------|------|-------------|\n", @@ -126,7 +520,7 @@ "id": "5a8d3627", "metadata": {}, "source": [ - "### 4. Determine the quartile for the rental rates of films:" + "### 4. Determine the quartile for the rental rates of films:" ] }, { @@ -140,12 +534,147 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 131, + "id": "1306fb28", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlerental_rateNTILE(4) OVER (ORDER BY rental_rate)
0ACADEMY DINOSAUR0.991
1ALAMO VIDEOTAPE0.991
2ALASKA PHANTOM0.991
3ALICE FANTASIA0.991
4ALONE TRIP0.991
............
995WONDERLAND CHRISTMAS4.994
996WORKING MICROCOSMOS4.994
997WYOMING STORM4.994
998YENTL IDAHO4.994
999ZORRO ARK4.994
\n", + "

1000 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " title rental_rate NTILE(4) OVER (ORDER BY rental_rate)\n", + "0 ACADEMY DINOSAUR 0.99 1\n", + "1 ALAMO VIDEOTAPE 0.99 1\n", + "2 ALASKA PHANTOM 0.99 1\n", + "3 ALICE FANTASIA 0.99 1\n", + "4 ALONE TRIP 0.99 1\n", + ".. ... ... ...\n", + "995 WONDERLAND CHRISTMAS 4.99 4\n", + "996 WORKING MICROCOSMOS 4.99 4\n", + "997 WYOMING STORM 4.99 4\n", + "998 YENTL IDAHO 4.99 4\n", + "999 ZORRO ARK 4.99 4\n", + "\n", + "[1000 rows x 3 columns]" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT title, rental_rate, NTILE(4) OVER (ORDER BY rental_rate)\n", + "FROM film\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "680369bd", "metadata": {}, "source": [ - "#### Expected output\n", + "#### Expected output\n", "\n", "|title|rental_rate|quartile |\n", "|-----|-----------|-------------------|\n", @@ -166,7 +695,7 @@ "id": "bc952a4d", "metadata": {}, "source": [ - "### 5. Determine the first and last rental date for each customer:" + "### 5. Determine the first and last rental date for each customer:" ] }, { @@ -180,6 +709,142 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c555d8cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idMIN(rental_date)MAX(rental_date)
012005-05-25 11:30:372005-08-22 20:03:46
122005-05-27 00:09:242005-08-23 17:39:35
232005-05-27 17:17:092005-08-23 07:10:14
342005-06-15 09:31:282005-08-23 07:43:00
452005-05-29 07:25:162006-02-14 15:16:03
............
5945952005-05-28 15:27:222005-08-23 21:26:35
5955962005-05-26 21:16:522006-02-14 15:16:03
5965972005-05-25 04:19:282006-02-14 15:16:03
5975982005-06-20 10:10:292005-08-23 00:44:15
5985992005-05-31 01:18:562005-08-23 11:25:00
\n", + "

599 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " customer_id MIN(rental_date) MAX(rental_date)\n", + "0 1 2005-05-25 11:30:37 2005-08-22 20:03:46\n", + "1 2 2005-05-27 00:09:24 2005-08-23 17:39:35\n", + "2 3 2005-05-27 17:17:09 2005-08-23 07:10:14\n", + "3 4 2005-06-15 09:31:28 2005-08-23 07:43:00\n", + "4 5 2005-05-29 07:25:16 2006-02-14 15:16:03\n", + ".. ... ... ...\n", + "594 595 2005-05-28 15:27:22 2005-08-23 21:26:35\n", + "595 596 2005-05-26 21:16:52 2006-02-14 15:16:03\n", + "596 597 2005-05-25 04:19:28 2006-02-14 15:16:03\n", + "597 598 2005-06-20 10:10:29 2005-08-23 00:44:15\n", + "598 599 2005-05-31 01:18:56 2005-08-23 11:25:00\n", + "\n", + "[599 rows x 3 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT customer_id, MIN(rental_date), MAX(rental_date)\n", + "FROM rental\n", + "GROUP BY customer_id\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "ba8b0ba3", @@ -209,7 +874,7 @@ "id": "5a3ea58f", "metadata": {}, "source": [ - "### 6. Calculate the rank of customers based on their rental counts:" + "### 6. Calculate the rank of customers based on their rental counts:" ] }, { @@ -223,12 +888,148 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 51, + "id": "278561e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_countrental_count_rank
0148461
1526452
2144423
3236423
475415
............
59424815594
5956114596
59611014596
59728114596
59831812599
\n", + "

599 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count rental_count_rank\n", + "0 148 46 1\n", + "1 526 45 2\n", + "2 144 42 3\n", + "3 236 42 3\n", + "4 75 41 5\n", + ".. ... ... ...\n", + "594 248 15 594\n", + "595 61 14 596\n", + "596 110 14 596\n", + "597 281 14 596\n", + "598 318 12 599\n", + "\n", + "[599 rows x 3 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT customer_id, COUNT(rental_id) AS rental_count, RANK() OVER (ORDER BY COUNT(rental_id) DESC) AS rental_count_rank\n", + "FROM rental\n", + "GROUP BY customer_id\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "8014e0d8", "metadata": {}, "source": [ - "#### Expected output\n", + "#### Expected output\n", "\n", "|customer_id|rental_count |rental_count_rank|\n", "|-----------|-----------------------------|-----------------|\n", @@ -246,7 +1047,7 @@ "id": "48d922e3", "metadata": {}, "source": [ - "### 7. Calculate the running total of revenue per day for the 'Family' film category:" + "### 7. Calculate the running total of revenue per day for the 'Family' film category:" ] }, { @@ -260,6 +1061,159 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 103, + "id": "11446eef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlerental_dateamountdaily_revenue
0BLANKET BEVERLY2005-05-24 22:53:302.992.99
1KING EVOLUTION2005-05-25 00:22:556.996.99
2APACHE DIVINE2005-05-25 01:59:464.9911.98
3MANCHURIAN CURTAIN2005-05-25 04:47:444.9916.97
4GANDHI KWAI2005-05-25 13:52:430.9917.96
...............
1091HALF OUTFIELD2006-02-14 15:16:032.9929.90
1092HALF OUTFIELD2006-02-14 15:16:030.0029.90
1093JASON TRAP2006-02-14 15:16:032.9929.90
1094MOVIE SHAKESPEARE2006-02-14 15:16:030.0029.90
1095OPPOSITE NECKLACE2006-02-14 15:16:034.9929.90
\n", + "

1096 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " title rental_date amount daily_revenue\n", + "0 BLANKET BEVERLY 2005-05-24 22:53:30 2.99 2.99\n", + "1 KING EVOLUTION 2005-05-25 00:22:55 6.99 6.99\n", + "2 APACHE DIVINE 2005-05-25 01:59:46 4.99 11.98\n", + "3 MANCHURIAN CURTAIN 2005-05-25 04:47:44 4.99 16.97\n", + "4 GANDHI KWAI 2005-05-25 13:52:43 0.99 17.96\n", + "... ... ... ... ...\n", + "1091 HALF OUTFIELD 2006-02-14 15:16:03 2.99 29.90\n", + "1092 HALF OUTFIELD 2006-02-14 15:16:03 0.00 29.90\n", + "1093 JASON TRAP 2006-02-14 15:16:03 2.99 29.90\n", + "1094 MOVIE SHAKESPEARE 2006-02-14 15:16:03 0.00 29.90\n", + "1095 OPPOSITE NECKLACE 2006-02-14 15:16:03 4.99 29.90\n", + "\n", + "[1096 rows x 4 columns]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT film.title, rental.rental_date, payment.amount, SUM(payment.amount) OVER (PARTITION BY DATE(rental.rental_date) ORDER BY rental.rental_date ASC) AS daily_revenue\n", + "FROM payment\n", + "JOIN rental ON payment.rental_id = rental.rental_id\n", + "JOIN inventory ON rental.inventory_id = inventory.inventory_id\n", + "JOIN film ON inventory.film_id = film.film_id\n", + "JOIN film_category ON film.film_id = film_category.film_id\n", + "JOIN category ON film_category.category_id = category.category_id\n", + "WHERE category.name = \"Family\"\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "d90b129e", @@ -283,7 +1237,7 @@ "id": "3b5c9a9b", "metadata": {}, "source": [ - "### 8. Assign a unique ID to each payment within each customer's payment history:" + "### 8. Assign a unique ID to each payment within each customer's payment history:" ] }, { @@ -297,6 +1251,141 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 118, + "id": "742dfb58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idpayment_iddaily_revenue
0111
1122
2133
3144
4155
............
160445991604515
160455991604616
160465991604717
160475991604818
160485991604919
\n", + "

16049 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " customer_id payment_id daily_revenue\n", + "0 1 1 1\n", + "1 1 2 2\n", + "2 1 3 3\n", + "3 1 4 4\n", + "4 1 5 5\n", + "... ... ... ...\n", + "16044 599 16045 15\n", + "16045 599 16046 16\n", + "16046 599 16047 17\n", + "16047 599 16048 18\n", + "16048 599 16049 19\n", + "\n", + "[16049 rows x 3 columns]" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT customer_id, payment_id, COUNT(payment_id) OVER (PARTITION BY customer_id ORDER BY payment_id) AS daily_revenue\n", + "FROM payment\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "705cc7df", @@ -355,7 +1444,7 @@ "id": "a92a965e", "metadata": {}, "source": [ - "### 9. Calculate the difference in days between each rental and the previous rental for each customer:" + "### 9. Calculate the difference in days between each rental and the previous rental for each customer:" ] }, { @@ -369,12 +1458,184 @@ "```" ] }, + { + "cell_type": "code", + "execution_count": 124, + "id": "58ec029c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_idrental_datepreviousdifference
01762005-05-25 11:30:37NaTNaN
115732005-05-28 10:35:232005-05-25 11:30:373.0
2111852005-06-15 00:54:122005-05-28 10:35:2318.0
3114222005-06-15 18:02:532005-06-15 00:54:120.0
4114762005-06-15 21:08:462005-06-15 18:02:530.0
..................
16039599145992005-08-21 17:43:422005-08-21 05:07:080.0
16040599147192005-08-21 21:41:572005-08-21 17:43:420.0
16041599155902005-08-23 06:09:442005-08-21 21:41:572.0
16042599157192005-08-23 11:08:462005-08-23 06:09:440.0
16043599157252005-08-23 11:25:002005-08-23 11:08:460.0
\n", + "

16044 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_id rental_date previous \\\n", + "0 1 76 2005-05-25 11:30:37 NaT \n", + "1 1 573 2005-05-28 10:35:23 2005-05-25 11:30:37 \n", + "2 1 1185 2005-06-15 00:54:12 2005-05-28 10:35:23 \n", + "3 1 1422 2005-06-15 18:02:53 2005-06-15 00:54:12 \n", + "4 1 1476 2005-06-15 21:08:46 2005-06-15 18:02:53 \n", + "... ... ... ... ... \n", + "16039 599 14599 2005-08-21 17:43:42 2005-08-21 05:07:08 \n", + "16040 599 14719 2005-08-21 21:41:57 2005-08-21 17:43:42 \n", + "16041 599 15590 2005-08-23 06:09:44 2005-08-21 21:41:57 \n", + "16042 599 15719 2005-08-23 11:08:46 2005-08-23 06:09:44 \n", + "16043 599 15725 2005-08-23 11:25:00 2005-08-23 11:08:46 \n", + "\n", + " difference \n", + "0 NaN \n", + "1 3.0 \n", + "2 18.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "... ... \n", + "16039 0.0 \n", + "16040 0.0 \n", + "16041 2.0 \n", + "16042 0.0 \n", + "16043 0.0 \n", + "\n", + "[16044 rows x 5 columns]" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "SELECT customer_id, rental_id, rental_date, LAG(rental_date) OVER (ORDER BY customer_id, rental_id) as previous, DATEDIFF(rental_date, (LAG(rental_date) OVER (ORDER BY customer_id, rental_id))) as difference\n", + "FROM rental\n", + "\"\"\"\n", + "\n", + "pd.read_sql_query(query, engine)" + ] + }, { "cell_type": "markdown", "id": "f23f4cf8", "metadata": {}, "source": [ - "#### Expected output\n", + "#### Expected output\n", "\n", "\n", "|customer_id|rental_id|rental_date |previous_rental_date|days_between_rentals|\n", @@ -410,7 +1671,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.5" }, "nbTranslate": { "displayLangs": [