diff --git a/supporting-blog-content/Boston-Celtics-Demo/celtics-esql-demo.ipynb b/supporting-blog-content/Boston-Celtics-Demo/celtics-esql-demo.ipynb new file mode 100644 index 00000000..0c19d8ae --- /dev/null +++ b/supporting-blog-content/Boston-Celtics-Demo/celtics-esql-demo.ipynb @@ -0,0 +1,656 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d8f76480-b620-4cfa-bcaa-236e667ae45b", + "metadata": {}, + "source": [ + "The following example is adapted from our Search Labs post on using [ES|QL with the Python language client](https://www.elastic.co/search-labs/blog/esql-pandas-dataframes-python). You can load this data using a method described in this [blog post](https://www.elastic.co/search-labs/blog/analyzing-data-using-python-elasticsearch-and-kibana)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "800448a5-57e9-4392-bdbe-09b889a87848", + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "from io import StringIO # Used to convert strings to file-like objects\n", + "from getpass import getpass # Securely request password or sensitive input\n", + "from elasticsearch import (\n", + " Elasticsearch,\n", + ") # Elasticsearch client from the official library\n", + "import pandas as pd # Pandas library for data manipulation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2dd890b9-828a-49ee-bbdf-c3de1a5576d9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Host: ········\n", + "API Key: ········\n" + ] + } + ], + "source": [ + "# Create an Elasticsearch client instance\n", + "client = Elasticsearch(\n", + " getpass(\"Host: \"), # Securely get the host for the Elasticsearch cluster\n", + " api_key=getpass(\"API Key: \"), # Securely get the API key for authentication\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ed7a849a-b23e-4a18-9377-04a790df60e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Perform a query to retrieve the first 500 entries\n", + "response = client.esql.query(\n", + " query=\"FROM esql | LIMIT 500\",\n", + " format=\"csv\", # Specify the output format as CSV\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4e152565-84d8-4632-86ff-9c8244aeef71", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ASTBLKDREBFG3AFG3MFG3_PCTFGAFGMFG_PCTFTA...SEASON_ID.keywordSTLTEAM_ABBREVIATIONTEAM_ABBREVIATION.keywordTEAM_IDTEAM_NAMETEAM_NAME.keywordTOVWLWL.keyword
02133537140.37886360.41918...420235BOSBOS1610612738Boston CelticsBoston Celtics10WW
12263337110.29782390.47621...420234BOSBOS1610612738Boston CelticsBoston Celtics5WW
22183332120.37580370.46321...420239BOSBOS1610612738Boston CelticsBoston Celtics12LL
32763449220.44982390.47616...420236BOSBOS1610612738Boston CelticsBoston Celtics10WW
429153832160.50089510.57321...2202310BOSBOS1610612738Boston CelticsBoston Celtics14WW
..................................................................
802264045150.33390430.47827...220234BOSBOS1610612738Boston CelticsBoston Celtics11WW
812724635200.57195540.56828...220235BOSBOS1610612738Boston CelticsBoston Celtics11WW
823163653190.358102510.5007...2202311BOSBOS1610612738Boston CelticsBoston Celtics17WW
832063939160.41095450.47419...220237BOSBOS1610612738Boston CelticsBoston Celtics15WW
8418113939120.30877370.48126...220236BOSBOS1610612738Boston CelticsBoston Celtics13WW
\n", + "

85 rows × 34 columns

\n", + "
" + ], + "text/plain": [ + " AST BLK DREB FG3A FG3M FG3_PCT FGA FGM FG_PCT FTA ... \\\n", + "0 21 3 35 37 14 0.378 86 36 0.419 18 ... \n", + "1 22 6 33 37 11 0.297 82 39 0.476 21 ... \n", + "2 21 8 33 32 12 0.375 80 37 0.463 21 ... \n", + "3 27 6 34 49 22 0.449 82 39 0.476 16 ... \n", + "4 29 15 38 32 16 0.500 89 51 0.573 21 ... \n", + ".. ... ... ... ... ... ... ... ... ... ... ... \n", + "80 22 6 40 45 15 0.333 90 43 0.478 27 ... \n", + "81 27 2 46 35 20 0.571 95 54 0.568 28 ... \n", + "82 31 6 36 53 19 0.358 102 51 0.500 7 ... \n", + "83 20 6 39 39 16 0.410 95 45 0.474 19 ... \n", + "84 18 11 39 39 12 0.308 77 37 0.481 26 ... \n", + "\n", + " SEASON_ID.keyword STL TEAM_ABBREVIATION TEAM_ABBREVIATION.keyword \\\n", + "0 42023 5 BOS BOS \n", + "1 42023 4 BOS BOS \n", + "2 42023 9 BOS BOS \n", + "3 42023 6 BOS BOS \n", + "4 22023 10 BOS BOS \n", + ".. ... ... ... ... \n", + "80 22023 4 BOS BOS \n", + "81 22023 5 BOS BOS \n", + "82 22023 11 BOS BOS \n", + "83 22023 7 BOS BOS \n", + "84 22023 6 BOS BOS \n", + "\n", + " TEAM_ID TEAM_NAME TEAM_NAME.keyword TOV WL WL.keyword \n", + "0 1610612738 Boston Celtics Boston Celtics 10 W W \n", + "1 1610612738 Boston Celtics Boston Celtics 5 W W \n", + "2 1610612738 Boston Celtics Boston Celtics 12 L L \n", + "3 1610612738 Boston Celtics Boston Celtics 10 W W \n", + "4 1610612738 Boston Celtics Boston Celtics 14 W W \n", + ".. ... ... ... ... .. ... \n", + "80 1610612738 Boston Celtics Boston Celtics 11 W W \n", + "81 1610612738 Boston Celtics Boston Celtics 11 W W \n", + "82 1610612738 Boston Celtics Boston Celtics 17 W W \n", + "83 1610612738 Boston Celtics Boston Celtics 15 W W \n", + "84 1610612738 Boston Celtics Boston Celtics 13 W W \n", + "\n", + "[85 rows x 34 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert the CSV response into a pandas DataFrame\n", + "df = pd.read_csv(StringIO(response.body))\n", + "df # Display the DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ae173c10-94f5-4608-ba63-9bd37ca84378", + "metadata": {}, + "outputs": [], + "source": [ + "# Query to fetch game dates and win/loss information, limiting to 10 entries\n", + "wl_date = client.esql.query(\n", + " query=\"FROM esql | KEEP GAME_DATE, WL | LIMIT 10\",\n", + " format=\"csv\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a0cfd315-8d25-44aa-bc70-19efa32bf5b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_DATEWL
02024-04-29T00:00:00.000ZW
12024-04-27T00:00:00.000ZW
22024-04-24T00:00:00.000ZL
32024-04-21T00:00:00.000ZW
42024-04-14T00:00:00.000ZW
52024-04-12T00:00:00.000ZW
62024-04-11T00:00:00.000ZL
72024-04-07T00:00:00.000ZW
82024-04-05T00:00:00.000ZW
92024-04-03T00:00:00.000ZW
\n", + "
" + ], + "text/plain": [ + " GAME_DATE WL\n", + "0 2024-04-29T00:00:00.000Z W\n", + "1 2024-04-27T00:00:00.000Z W\n", + "2 2024-04-24T00:00:00.000Z L\n", + "3 2024-04-21T00:00:00.000Z W\n", + "4 2024-04-14T00:00:00.000Z W\n", + "5 2024-04-12T00:00:00.000Z W\n", + "6 2024-04-11T00:00:00.000Z L\n", + "7 2024-04-07T00:00:00.000Z W\n", + "8 2024-04-05T00:00:00.000Z W\n", + "9 2024-04-03T00:00:00.000Z W" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert the query response into a DataFrame\n", + "wl_date_df = pd.read_csv(StringIO(wl_date.body))\n", + "wl_date_df # Display the DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a422a04a-a314-4a45-8c09-73aadfb958bc", + "metadata": {}, + "outputs": [], + "source": [ + "# Query to calculate average field goal percentage and average three-point percentage\n", + "stats = client.esql.query(\n", + " query=\"FROM esql | STATS AVG(FG_PCT), AVG(FG3_PCT) | LIMIT 1\",\n", + " format=\"csv\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1468ab81-5317-4049-b9e9-5ee30519f194", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AVG(FG_PCT)AVG(FG3_PCT)
00.4873410.387706
\n", + "
" + ], + "text/plain": [ + " AVG(FG_PCT) AVG(FG3_PCT)\n", + "0 0.487341 0.387706" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert the stats query response into a DataFrame\n", + "stats_df = pd.read_csv(StringIO(stats.body))\n", + "stats_df # Display the DataFrame" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}