From 7c7ae75f1b384d1379b92699de46a96717292f6b Mon Sep 17 00:00:00 2001 From: openkmj Date: Thu, 9 Jan 2025 17:09:51 +0900 Subject: [PATCH 1/4] week1 --- .gitignore | 11 + missions/W1/M1.ipynb | 542 +++++ missions/W1/M2.ipynb | 2156 +++++++++++++++++ missions/W1/M3/README.md | 250 ++ missions/W1/M3/etl_project_gdp.py | 62 + missions/W1/M3/etl_project_gdp_from_csv.py | 119 + missions/W1/M3/etl_project_gdp_parallel.py | 236 ++ missions/W1/M3/etl_project_gdp_with_sql.py | 83 + missions/W1/M3/modules/exporter.py | 50 + missions/W1/M3/modules/importer.py | 180 ++ missions/W1/M3/modules/logger.py | 38 + .../M3/utils/create_country_region_table.py | 48 + missions/W1/M3/utils/create_large_data_csv.py | 72 + missions/W1/create.sql | 1035 ++++++++ missions/W1/mtcars.csv | 66 +- 15 files changed, 4915 insertions(+), 33 deletions(-) create mode 100644 missions/W1/M1.ipynb create mode 100644 missions/W1/M2.ipynb create mode 100644 missions/W1/M3/README.md create mode 100644 missions/W1/M3/etl_project_gdp.py create mode 100644 missions/W1/M3/etl_project_gdp_from_csv.py create mode 100644 missions/W1/M3/etl_project_gdp_parallel.py create mode 100644 missions/W1/M3/etl_project_gdp_with_sql.py create mode 100644 missions/W1/M3/modules/exporter.py create mode 100644 missions/W1/M3/modules/importer.py create mode 100644 missions/W1/M3/modules/logger.py create mode 100644 missions/W1/M3/utils/create_country_region_table.py create mode 100644 missions/W1/M3/utils/create_large_data_csv.py create mode 100644 missions/W1/create.sql diff --git a/.gitignore b/.gitignore index 68bc17f..3baab37 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,14 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +*.db +large_data*.csv +.DS_Store +.venv +.ipynb_checkpoints +large_data*.csv +__pycache__ +*.db +*.json +*.log \ No newline at end of file diff --git a/missions/W1/M1.ipynb b/missions/W1/M1.ipynb new file mode 100644 index 0000000..3a24589 --- /dev/null +++ b/missions/W1/M1.ipynb @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "a1297356-f73a-484b-b466-ae99843f51f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "head:\n", + " Unnamed: 0 mpg cyl disp hp drat wt qsec vs am gear \\\n", + "0 Mazda RX4 21.0 6 160.0 110 3.9 2.620 16.46 0 1 4 \n", + "1 Mazda RX4 Wag 21.0 6 160.0 110 3.9 2.875 17.02 0 1 4 \n", + "\n", + " carb \n", + "0 4 \n", + "1 4 \n", + "tail:\n", + " Unnamed: 0 mpg cyl disp hp drat wt qsec vs am gear carb\n", + "30 Maserati Bora 15.0 8 301.0 335 3.54 3.57 14.6 0 1 5 8\n", + "31 Volvo 142E 21.4 4 121.0 109 4.11 2.78 18.6 1 1 4 2\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "df = pd.read_csv('./mtcars.csv')\n", + "\n", + "# print head: 상단 n개 출력\n", + "print('head:\\n', df.head(2))\n", + "\n", + "# print tail: 하단 n개 출력\n", + "print('tail:\\n', df.tail(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bdf05680", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape:\n", + " (32, 12)\n", + "row count: 32\n" + ] + } + ], + "source": [ + "\n", + "# print shape:\n", + "print('shape:\\n', df.shape)\n", + "print('row count:', df.shape[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "58fcc7b9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "columns:\n", + " Index(['Unnamed: 0', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs',\n", + " 'am', 'gear', 'carb'],\n", + " dtype='object')\n", + "Unnamed: 0\n", + "columns:\n", + " Index(['name', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am',\n", + " 'gear', 'carb'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print('columns:\\n', df.columns)\n", + "\n", + "# set first column nas as 'name'\n", + "print(df.columns[0])\n", + "df.rename(columns={df.columns[0]: 'name'}, inplace=True)\n", + "\n", + "print('columns:\\n', df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7c3c4cdc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 32 entries, 0 to 31\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 32 non-null object \n", + " 1 mpg 32 non-null float64\n", + " 2 cyl 32 non-null int64 \n", + " 3 disp 32 non-null float64\n", + " 4 hp 32 non-null int64 \n", + " 5 drat 32 non-null float64\n", + " 6 wt 32 non-null float64\n", + " 7 qsec 32 non-null float64\n", + " 8 vs 32 non-null int64 \n", + " 9 am 32 non-null int64 \n", + " 10 gear 32 non-null int64 \n", + " 11 carb 32 non-null int64 \n", + "dtypes: float64(5), int64(6), object(1)\n", + "memory usage: 3.1+ KB\n", + "dtypes:\n", + " name object\n", + "mpg float64\n", + "cyl int64\n", + "disp float64\n", + "hp int64\n", + "drat float64\n", + "wt float64\n", + "qsec float64\n", + "vs int64\n", + "am int64\n", + "gear int64\n", + "carb int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "\n", + "# print info:\n", + "df.info()\n", + "\n", + "print('dtypes:\\n', df.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e1912a1a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "describe:\n", + " mpg cyl disp hp drat wt \\\n", + "count 32.000000 32.000000 32.000000 32.000000 32.000000 32.000000 \n", + "mean 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 \n", + "std 6.026948 1.785922 123.938694 68.562868 0.534679 0.978457 \n", + "min 10.400000 4.000000 71.100000 52.000000 2.760000 1.513000 \n", + "25% 15.425000 4.000000 120.825000 96.500000 3.080000 2.581250 \n", + "50% 19.200000 6.000000 196.300000 123.000000 3.695000 3.325000 \n", + "75% 22.800000 8.000000 326.000000 180.000000 3.920000 3.610000 \n", + "max 33.900000 8.000000 472.000000 335.000000 4.930000 5.424000 \n", + "\n", + " qsec vs am gear carb \n", + "count 32.000000 32.000000 32.000000 32.000000 32.0000 \n", + "mean 17.848750 0.437500 0.406250 3.687500 2.8125 \n", + "std 1.786943 0.504016 0.498991 0.737804 1.6152 \n", + "min 14.500000 0.000000 0.000000 3.000000 1.0000 \n", + "25% 16.892500 0.000000 0.000000 3.000000 2.0000 \n", + "50% 17.710000 0.000000 0.000000 4.000000 2.0000 \n", + "75% 18.900000 1.000000 1.000000 4.000000 4.0000 \n", + "max 22.900000 1.000000 1.000000 5.000000 8.0000 \n" + ] + } + ], + "source": [ + "\n", + "# print describe:\n", + "print('describe:\\n', df.describe())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "976cd107", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gear unique values: 3 [4 3 5]\n", + "transmission unique values: 2 [1 0]\n" + ] + } + ], + "source": [ + "print('gear unique values:', df['gear'].nunique(), df['gear'].unique())\n", + "\n", + "print('transmission unique values:', df['am'].nunique(), df['am'].unique())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "86e8f8b6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "am 0 1\n", + "gear \n", + "3 15 0\n", + "4 4 8\n", + "5 0 5\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# with pandas cross tab\n", + "result = pd.crosstab(df['gear'], df['am'])\n", + "print(result)\n", + "\n", + "x = [f\"{gear}, {am}\" for gear in result.index for am in result.columns]\n", + "y = result.values.flatten()\n", + "\n", + "plt.bar(x,y)\n", + "\n", + "plt.title('# of Cars by Gear x Transmission')\n", + "plt.xlabel('Gear, Auto Transmission')\n", + "plt.ylabel('# of Cars')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "302ffcd4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# without crosstab\n", + "groupby = df.groupby(['gear', 'am']).size().reset_index(name='count')\n", + "\n", + "x = [f\"{gear}, {am}\" for gear, am, _ in groupby.values]\n", + "y = [count for _, _, count in groupby.values]\n", + "\n", + "\n", + "plt.bar(x,y)\n", + "\n", + "# make graph with above data\n", + "plt.title('# of Cars by Gear x Transmission')\n", + "plt.xlabel('Gear, Auto Transmission')\n", + "plt.ylabel('# of Cars')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e9ca6925", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(4, 3, figsize=(8, 8))\n", + "\n", + "for i, column in enumerate(df.columns[1:]):\n", + " ax = axs[i//3, i%3]\n", + " ax.hist(df[column])\n", + " ax.set_title(column)\n", + " ax.grid(True)\n", + "\n", + "fig.delaxes(axs[3, 2])\n", + "\n", + "\n", + "plt.suptitle('Histograms of Variables')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ce93de2e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAASdRJREFUeJzt3XlclPX+///nAAKKMoSK4I5oKm5lpaKSLSauZVppx/VkZoqZ5rH0nDxmnTRbvp3qlO1qmmXlydJcPuS+UJjmirmFYQliEpuGKHP9/vDHHEdAWWaBy8f9dpvbzXlf77nm9WaYeHZd7+t9WQzDMAQAAGBSXp4uAAAAwJUIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIO0AlYrFY9Mwzz9ifz58/XxaLRceOHXPae7hinwDgSYQdwIWOHj2qMWPGqEmTJvL391dgYKC6dOmi1157TX/++aenyzO9Z555RhaLRb///nuR2xs3bqy+ffu6uaryu+2222SxWOyP4OBg3XLLLfrwww9ls9k8XR5Q4fh4ugDArL755hvdf//98vPz0/Dhw9W6dWvl5eVpy5YtmjJlivbv36933323XO8xbNgwDR48WH5+fk6qGpVF/fr1NXv2bEnSqVOn9NFHH2nUqFE6dOiQXnjhBQ9XB1QshB3ABZKSkjR48GA1atRI69atU1hYmH1bbGysjhw5om+++abc7+Pt7S1vb+9y78eVzpw5o4CAAE+X4XI2m015eXny9/d3y/tZrVYNHTrU/nzMmDFq3ry5/vOf/+i5555TlSpV3FJHWVwrvxOoODiNBbjAiy++qJycHH3wwQcOQadA06ZN9fjjj0uSunXrpnbt2hW5n+bNmysmJqbY9ylqfk3BqZktW7aoQ4cO8vf3V5MmTfTRRx8Vev3+/ft1xx13qGrVqqpfv77+9a9/FXsaZNWqVYqOjlZAQIBq1KihPn36aP/+/Q59Ro4cqerVq+vo0aPq3bu3atSooSFDhhS5vy+++EIWi0UbN24stO2dd96RxWLRvn37JEmpqan661//qvr168vPz09hYWG65557XDKv6MyZM5o8ebIaNGggPz8/NW/eXC+//LIMw3DoZ7FYNH78eH388cdq1aqV/Pz8tHr1aknSp59+qptuukk1atRQYGCg2rRpo9dee83h9RkZGZo4caL9fZo2bao5c+aU+TRUtWrV1KlTJ505c0anTp2SJP3888+6//77FRwcbN9+acg2DEO1atXSE088YW+z2WwKCgqSt7e3MjIy7O1z5syRj4+PcnJy7G0//fST7rvvPgUHB8vf318333yzvv76a4e6Cn5HN27cqHHjxikkJET169cv0xiBsuLIDuACy5cvV5MmTdS5c+er9h02bJhGjx6tffv2qXXr1vb27du369ChQ3r66adL/f5HjhzRfffdp1GjRmnEiBH68MMPNXLkSN10001q1aqVpIsB4vbbb9eFCxc0depUBQQE6N1331XVqlUL7W/hwoUaMWKEYmJiNGfOHJ09e1Zz585V165d9eOPP6px48b2vhcuXFBMTIy6du2ql19+WdWqVSuyxj59+qh69er67LPP1K1bN4dtS5YsUatWrew/j4EDB2r//v167LHH1LhxY6WlpSkuLk7JyckO712c9PT0ItsvDxaGYejuu+/W+vXrNWrUKN1www1as2aNpkyZot9++02vvvqqQ/9169bps88+0/jx41WrVi01btxYcXFxevDBB3XnnXdqzpw5kqQDBw5o69at9oB79uxZdevWTb/99pvGjBmjhg0batu2bZo2bZpSUlL073//+6pjKsrPP/8sb29vBQUF6eTJk+rcubPOnj2rCRMmqGbNmlqwYIHuvvtuffHFF7r33ntlsVjUpUsXbdq0yb6PPXv2KDMzU15eXtq6dav69OkjSdq8ebNuvPFGVa9eXdLFoNylSxfVq1fP/vvz2WefqX///lq6dKnuvfdeh9rGjRun2rVr65///KfOnDlTpvEBZWYAcKrMzExDknHPPfeUqH9GRobh7+9vPPXUUw7tEyZMMAICAoycnBx7myRjxowZ9ufz5s0zJBlJSUn2tkaNGhmSjE2bNtnb0tLSDD8/P2Py5Mn2tokTJxqSjO+//96hn9Vqddhndna2ERQUZIwePdqhvtTUVMNqtTq0jxgxwpBkTJ06tURjf/DBB42QkBDjwoUL9raUlBTDy8vLePbZZw3DMIw//vjDkGS89NJLJdrnpWbMmGFIuuKjT58+9v7Lli0zJBn/+te/HPZz3333GRaLxThy5Ii9TZLh5eVl7N+/36Hv448/bgQGBjqM6XLPPfecERAQYBw6dMihferUqYa3t7eRnJx8xXF169bNaNGihXHq1Cnj1KlTxoEDB4wJEyYYkox+/foZhvG/z3fz5s3212VnZxvh4eFG48aNjfz8fMMwDOOll14yvL29jaysLMMwDOP11183GjVqZHTo0MH+O5mfn28EBQUZkyZNsu/rzjvvNNq0aWPk5uba22w2m9G5c2ejWbNm9raC39GuXbte8WcCuBKnsQAny8rKkiTVqFGjRP2tVqvuueceffLJJ/ZTJfn5+VqyZIn69+9fprkNkZGRio6Otj+vXbu2mjdvrp9//tnetnLlSnXq1EkdOnRw6Hf5aae4uDhlZGTowQcf1O+//25/eHt7q2PHjlq/fn2h9x87dmyJ6hw0aJDS0tK0YcMGe9sXX3whm82mQYMGSZKqVq0qX19fbdiwQX/88UeJ9nu5pUuXKi4urtCjTp06Dv1Wrlwpb29vTZgwwaF98uTJMgxDq1atcmjv1q2bIiMjHdqCgoJ05swZxcXFFVvP559/rujoaF133XUOP9Pu3bsrPz/f4UhLcX766SfVrl1btWvXVsuWLfXGG2+oT58++vDDD+1j6dChg7p27Wp/TfXq1fXII4/o2LFjSkxMlCRFR0crPz9f27Ztk3TxCE50dLSio6O1efNmSdK+ffuUkZFh/51KT0/XunXr9MADDyg7O9te/+nTpxUTE6PDhw/rt99+c6h39OjRFX5+GcyL01iAkwUGBkqSsrOzS/ya4cOHa8mSJdq8ebNuvfVWffvttzp58qSGDRtWphoaNmxYqO26665zCAu//PKLOnbsWKhf8+bNHZ4fPnxYknTHHXcU+V4F4y3g4+NT4jkZPXv2lNVq1ZIlS3TnnXdKungK64YbbtD1118vSfLz89OcOXM0efJk1alTR506dVLfvn01fPhwhYaGluh9br31VtWqVatQ++WTiX/55RfVrVu3UFBt2bKlffulwsPDC+1z3Lhx+uyzz9SrVy/Vq1dPPXr00AMPPKCePXva+xw+fFh79uxR7dq1i6w3LS3tqmNq3Lix3nvvPVksFvn7+6tZs2YKCQlxGEtRn++lY2ndurXat2+vatWqafPmzYqJidHmzZs1c+ZMhYaG6o033lBubq499BQEpyNHjsgwDE2fPl3Tp08vdgz16tWzPy/qZwW4C2EHcLLAwEDVrVvXPrm2JGJiYlSnTh0tWrRIt956qxYtWqTQ0FB17969TDUU93/QxmWTbEuiYF7LwoULiwwXPj6O/xnx8/OTl1fJDhr7+fmpf//++vLLL/XWW2/p5MmT2rp1q2bNmuXQb+LEierXr5+WLVumNWvWaPr06Zo9e7bWrVunG2+8sdRjcpai5jeFhIRo165dWrNmjVatWqVVq1Zp3rx5Gj58uBYsWCDp4s/0rrvu0pNPPlnkfguC3pUEBASU+ffjUlWqVFHHjh21adMmHTlyRKmpqYqOjladOnV0/vx5ff/999q8ebNatGhhD2cFvxN/+9vfip1A37RpU4fnRf2sAHch7AAu0LdvX7377ruKj49XVFTUVft7e3vrL3/5i+bPn685c+Zo2bJlLj/s36hRI/tRm0sdPHjQ4XlERISki3/EnfHH9XKDBg3SggULtHbtWh04cECGYdhPYV1ex+TJkzV58mQdPnxYN9xwg1555RUtWrTIabU0atRI3377rbKzsx2O7vz000/27SXh6+urfv36qV+/frLZbBo3bpzeeecdTZ8+XU2bNlVERIRycnJc8vMs0KhRo0KfpVT0WKKjozVnzhx9++23qlWrllq0aCGLxaJWrVpp8+bN2rx5s8Pii02aNJF0MSi5cgyAszBnB3CBJ598UgEBAXr44Yd18uTJQtuPHj1a6FLkYcOG6Y8//tCYMWOUk5PjsIaKK/Tu3VvfffedEhIS7G2nTp3Sxx9/7NAvJiZGgYGBmjVrls6fP19oPwWXOZdV9+7dFRwcrCVLlmjJkiXq0KGDwymPs2fPKjc31+E1ERERqlGjhs6dO1eu975c7969lZ+fr//85z8O7a+++qosFot69ep11X2cPn3a4bmXl5fatm0rSfZ6H3jgAcXHx2vNmjWFXp+RkaELFy6UdQh2vXv3VkJCguLj4+1tZ86c0bvvvqvGjRs7zDWKjo7WuXPn9O9//1tdu3aVxWKxty9cuFAnTpxwmAMWEhKi2267Te+8845SUlIKvXd5fycAZ+PIDuACERERWrx4sQYNGqSWLVs6rKC8bds2ff755xo5cqTDa2688Ua1bt1an3/+uVq2bKn27du7tMYnn3xSCxcuVM+ePfX444/bLz1v1KiR9uzZY+8XGBiouXPnatiwYWrfvr0GDx6s2rVrKzk5Wd988426dOlSKByURpUqVTRgwAB9+umnOnPmjF5++WWH7YcOHdKdd96pBx54QJGRkfLx8dGXX36pkydPavDgwWV+36L069dPt99+u/7xj3/o2LFjateunf7v//5PX331lSZOnGg/ynUlDz/8sNLT03XHHXeofv36+uWXX/TGG2/ohhtusM+XmTJlir7++mv17dvXviTAmTNntHfvXn3xxRc6duxYkXOMSmPq1Kn65JNP1KtXL02YMEHBwcFasGCBkpKStHTpUodTjVFRUfLx8dHBgwf1yCOP2NtvvfVWzZ07V5Icwo4kvfnmm+ratavatGmj0aNHq0mTJjp58qTi4+P166+/avfu3eWqH3Aqj14LBpjcoUOHjNGjRxuNGzc2fH19jRo1ahhdunQx3njjDYdLdgu8+OKLhiRj1qxZRe5PJbz0/NLLqQt069bN6Natm0Pbnj17jG7duhn+/v5GvXr1jOeee8744IMPCu3TMAxj/fr1RkxMjGG1Wg1/f38jIiLCGDlypPHDDz/Y+4wYMcIICAi4+g/mMnFxcYYkw2KxGMePH3fY9vvvvxuxsbFGixYtjICAAMNqtRodO3Y0Pvvss6vut+DS81OnThW5vaifVXZ2tjFp0iSjbt26RpUqVYxmzZoZL730kmGz2Rz6STJiY2ML7fOLL74wevToYYSEhBi+vr5Gw4YNjTFjxhgpKSmF3mfatGlG06ZNDV9fX6NWrVpG586djZdfftnIy8u74ri6detmtGrV6qrjP3r0qHHfffcZQUFBhr+/v9GhQwdjxYoVRfa95ZZbCi1F8OuvvxqSjAYNGhS7/+HDhxuhoaFGlSpVjHr16hl9+/Y1vvjiC3ufgt/R7du3X7VewFUshlGGGYsAXOK1117TpEmTdOzYsSKvqAIAlB5hB6ggDMNQu3btVLNmzSLXrgEAlA1zdgAPO3PmjL7++mutX79ee/fu1VdffeXpkgDAVDiyA3jYsWPHFB4erqCgII0bN07PP/+8p0sCAFMh7AAAAFNjnR0AAGBqhB0AAGBqTFDWxfu8nDhxQjVq1LCvHAoAACo2wzCUnZ2tunXrXvGefIQdSSdOnFCDBg08XQYAACiD48ePq379+sVuJ+xI9hv+HT9+XIGBgR6uBgAAlERWVpYaNGjgcOPeohB2JPupq8DAQMIOAACVzNWmoDBBGQAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBorKAMAAJfItxlKSEpXWnauQmr4q0N4sLy93H/DbcIOAABwutX7UjRzeaJSMnPtbWFWf83oF6mercPcWgunsQAAgFOt3peisYt2OgQdSUrNzNXYRTu1el+KW+sh7AAAAKfJtxmauTxRRhHbCtpmLk9Uvq2oHq5B2AEAAE6TkJRe6IjOpQxJKZm5SkhKd1tNhB0AAOA0adnFB52y9HMGwg4AAHCakBr+Tu3nDIQdAADgNB3CgxVm9VdxF5hbdPGqrA7hwW6ribADAACcxtvLohn9IiWpUOApeD6jX6Rb19sh7AAAAKfq2TpMc4e2V6jV8VRVqNVfc4e2d/s6OywqCAAAnK5n6zDdFRnKCsoAAMC8vL0sioqo6ekyOI0FAADMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMzaNhZ+7cuWrbtq0CAwMVGBioqKgorVq1yr49NzdXsbGxqlmzpqpXr66BAwfq5MmTDvtITk5Wnz59VK1aNYWEhGjKlCm6cOGCu4cCAAAqKI+Gnfr16+uFF17Qjh079MMPP+iOO+7QPffco/3790uSJk2apOXLl+vzzz/Xxo0bdeLECQ0YMMD++vz8fPXp00d5eXnatm2bFixYoPnz5+uf//ynp4YEAAAqGIthGIani7hUcHCwXnrpJd13332qXbu2Fi9erPvuu0+S9NNPP6lly5aKj49Xp06dtGrVKvXt21cnTpxQnTp1JElvv/22nnrqKZ06dUq+vr4les+srCxZrVZlZmYqMDDQZWMDAADOU9K/3xVmzk5+fr4+/fRTnTlzRlFRUdqxY4fOnz+v7t272/u0aNFCDRs2VHx8vCQpPj5ebdq0sQcdSYqJiVFWVpb96FBRzp07p6ysLIcHAAAwJ4+Hnb1796p69ery8/PTo48+qi+//FKRkZFKTU2Vr6+vgoKCHPrXqVNHqampkqTU1FSHoFOwvWBbcWbPni2r1Wp/NGjQwLmDAgAAFYbHw07z5s21a9cuff/99xo7dqxGjBihxMREl77ntGnTlJmZaX8cP37cpe8HAAA8x8fTBfj6+qpp06aSpJtuuknbt2/Xa6+9pkGDBikvL08ZGRkOR3dOnjyp0NBQSVJoaKgSEhIc9ldwtVZBn6L4+fnJz8/PySMBAAAVkceP7FzOZrPp3Llzuummm1SlShWtXbvWvu3gwYNKTk5WVFSUJCkqKkp79+5VWlqavU9cXJwCAwMVGRnp9toBAEDF49EjO9OmTVOvXr3UsGFDZWdna/HixdqwYYPWrFkjq9WqUaNG6YknnlBwcLACAwP12GOPKSoqSp06dZIk9ejRQ5GRkRo2bJhefPFFpaam6umnn1ZsbCxHbgAAgCQPh520tDQNHz5cKSkpslqtatu2rdasWaO77rpLkvTqq6/Ky8tLAwcO1Llz5xQTE6O33nrL/npvb2+tWLFCY8eOVVRUlAICAjRixAg9++yznhoSAACoYCrcOjuewDo7AABUPpVunR0AAABXIOwAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABTI+wAAABT8/F0AQAAwJzybYYSktKVlp2rkBr+6hAeLG8vi9vrIOwAAACnW70vRTOXJyolM9feFmb114x+kerZOsyttXAaCwAAONXqfSkau2inQ9CRpNTMXI1dtFOr96W4tR7CDgAAcJp8m6GZyxNlFLGtoG3m8kTl24rq4RqEHQAA4DQJSemFjuhcypCUkpmrhKR0t9VE2AEAAE6Tll180ClLP2cg7AAAAKcJqeHv1H7OQNgBAABO0yE8WGFWfxV3gblFF6/K6hAe7LaaCDsAAMBpvL0smtEvUpIKBZ6C5zP6Rbp1vR3CDgAAcKqercM0d2h7hVodT1WFWv01d2h7t6+zw6KCAADA6Xq2DtNdkaGsoAwAAMzL28uiqIiani6D01gAAMDcCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUCDsAAMDUPBp2Zs+erVtuuUU1atRQSEiI+vfvr4MHDzr0ue2222SxWBwejz76qEOf5ORk9enTR9WqVVNISIimTJmiCxcuuHMoAACggvLoOjsbN25UbGysbrnlFl24cEF///vf1aNHDyUmJiogIMDeb/To0Xr22Wftz6tVq2b/d35+vvr06aPQ0FBt27ZNKSkpGj58uKpUqaJZs2a5dTwAAKDisRiGYXi6iAKnTp1SSEiINm7cqFtvvVXSxSM7N9xwg/79738X+ZpVq1apb9++OnHihOrUqSNJevvtt/XUU0/p1KlT8vX1ver7ZmVlyWq1KjMzU4GBgU4bDwAAcJ2S/v2uUHN2MjMzJUnBwY53Qv34449Vq1YttW7dWtOmTdPZs2ft2+Lj49WmTRt70JGkmJgYZWVlaf/+/UW+z7lz55SVleXwAAAA5lRhbhdhs9k0ceJEdenSRa1bt7a3/+Uvf1GjRo1Ut25d7dmzR0899ZQOHjyo//73v5Kk1NRUh6Ajyf48NTW1yPeaPXu2Zs6c6aKRAACAiqTChJ3Y2Fjt27dPW7ZscWh/5JFH7P9u06aNwsLCdOedd+ro0aOKiIgo03tNmzZNTzzxhP15VlaWGjRoULbCAQBAkfIu2LQw/ph+ST+rRsHVNCyqsXx93H9SqUKEnfHjx2vFihXatGmT6tevf8W+HTt2lCQdOXJEERERCg0NVUJCgkOfkydPSpJCQ0OL3Iefn5/8/PycUDkAACjK7JWJem9zkmyXzAx+fuUBjY4O17TekW6txaNzdgzD0Pjx4/Xll19q3bp1Cg8Pv+prdu3aJUkKCwuTJEVFRWnv3r1KS0uz94mLi1NgYKAiI937wwQAABeDzjubHIOOJNkM6Z1NSZq9MtGt9Xg07MTGxmrRokVavHixatSoodTUVKWmpurPP/+UJB09elTPPfecduzYoWPHjunrr7/W8OHDdeutt6pt27aSpB49eigyMlLDhg3T7t27tWbNGj399NOKjY3l6A0AAG6Wd8Gm9zYnXbHPe5uTlHfB5qaKPBx25s6dq8zMTN12220KCwuzP5YsWSJJ8vX11bfffqsePXqoRYsWmjx5sgYOHKjly5fb9+Ht7a0VK1bI29tbUVFRGjp0qIYPH+6wLg8AAHCPhfHHCh3RuZzNuNjPXTw6Z+dqS/w0aNBAGzduvOp+GjVqpJUrVzqrLAAAUEa/pJ+9eqdS9HOGCrXODgAAqNwaBVe7eqdS9HMGwg4AAHCaYVGN5WW5ch8vy8V+7kLYAQAATuPr46XR0Ve+unp0dLhb19upEOvsAAAA8yhYR+fydXa8LPLIOjsV6kagnsKNQAEAcD5Xr6Bc0r/fHNkBAAAu4evjpVHRTTxdBnN2AACAuRF2AACAqRF2AACAqRF2AACAqRF2AACAqRF2AACAqXHpOQAAcIl8m6GEpHSlZecqpIa/OoQHy/tq95JwAcIOAABwutX7UjRzeaJSMnPtbWFWf83oF6mercPcWgunsQAAgFOt3peisYt2OgQdSUrNzNXYRTu1el+KW+sh7AAAAKfJtxmauTxRRd2LqqBt5vJE5dvcd7cqwg4AAHCahKT0Qkd0LmVISsnMVUJSuttqKlXYOX/+vB566CElJSW5qh4AAFCJpWUXH3TK0s8ZShV2qlSpoqVLl7qqFgAAUMmF1PB3aj9nKPVprP79+2vZsmUuKAUAAFR2HcKDFWb1V3EXmFt08aqsDuHBbqup1JeeN2vWTM8++6y2bt2qm266SQEBAQ7bJ0yY4LTiAABA5eLtZdGMfpEau2inLJLDROWCADSjX6Rb19uxGIZRqunQ4eHhxe/MYtHPP/9c7qLcLSsrS1arVZmZmQoMDPR0OQAAVHruWGenpH+/Sx12zIiwAwCA87l6BeWS/v0u8wrKeXl5SkpKUkREhHx8WIgZAAA48vayKCqipqfLKP0E5bNnz2rUqFGqVq2aWrVqpeTkZEnSY489phdeeMHpBQIAAJRHqcPOtGnTtHv3bm3YsEH+/v+7bKx79+5asmSJU4sDAAAor1Kff1q2bJmWLFmiTp06yWL533m3Vq1a6ejRo04tDgAAoLxKfWTn1KlTCgkJKdR+5swZh/ADAABQEZQ67Nx888365ptv7M8LAs7777+vqKgo51UGAADgBKU+jTVr1iz16tVLiYmJunDhgl577TUlJiZq27Zt2rhxoytqBAAAlZCrLz0vqVKHna5du2rXrl164YUX1KZNG/3f//2f2rdvr/j4eLVp08YVNQIAgErGHYsKlhSLCopFBQEAcKbV+1I0dtFOXR4wCo7pzB3a3imBp6R/v0s9Z2f48OGaN29epbwtBAAAcK18m6GZyxMLBR3pf/fJmrk8Ufk29x1rKXXY8fX11ezZs9W0aVM1aNBAQ4cO1fvvv6/Dhw+7oj4AAFCJJCSlO5y6upwhKSUzVwlJ6W6rqdRh5/3339ehQ4d0/Phxvfjii6pevbpeeeUVtWjRQvXr13dFjQAAoJJIyy4+6JSlnzOUOuwUuO6661SzZk1dd911CgoKko+Pj2rXru3M2gAAQCUTUsP/6p1K0c8ZSh12/v73v6tz586qWbOmpk6dqtzcXE2dOlWpqan68ccfXVEjAACoJDqEByvM6q/iLjC36OJVWR3Cg91WU6mvxvLy8lLt2rU1adIkDRgwQNdff72ranMbrsYCAMB5Cq7GkuQwUbnSXI31448/6h//+IcSEhLUpUsX1atXT3/5y1/07rvv6tChQ+UqGgAAVH49W4dp7tD2CrU6nqoKtfo7LeiURrnX2dm9e7deffVVffzxx7LZbMrPz3dWbW7DkR0AAJzP1Ssol/Tvd6lXUDYMQz/++KM2bNigDRs2aMuWLcrKylLbtm3VrVu3chUNAADMw9vLoqiImp4uo/RhJzg4WDk5OWrXrp26deum0aNHKzo6WkFBQS4oDwAAVFaV9t5YixYtUnR0NKd7AABAsUxzb6xff/1Vkir9YoLM2QEAwHkq/b2xbDabnn32WVmtVjVq1EiNGjVSUFCQnnvuOdlstnIVDQAAKreKeG+sUp/G+sc//qEPPvhAL7zwgrp06SJJ2rJli5555hnl5ubq+eefd3qRAACgcijNvbHcNXm51GFnwYIFev/993X33Xfb29q2bat69epp3LhxhB0AAK5hprg3Vnp6ulq0aFGovUWLFkpPd98dTAEAQMVjintjtWvXTv/5z38Ktf/nP/9Ru3btSrWv2bNn65ZbblGNGjUUEhKi/v376+DBgw59cnNzFRsbq5o1a6p69eoaOHCgTp486dAnOTlZffr0UbVq1RQSEqIpU6bowoULpR0aAAAop4p4b6xSn8Z68cUX1adPH3377beKioqSJMXHx+v48eNauXJlqfa1ceNGxcbG6pZbbtGFCxf097//XT169FBiYqICAgIkSZMmTdI333yjzz//XFarVePHj9eAAQO0detWSVJ+fr769Omj0NBQbdu2TSkpKRo+fLiqVKmiWbNmlXZ4AACgHLy9LJrRL1JjF+2URUXfG2tGv0i3rrdTpkvPT5w4obfeeksHDhyQJLVs2VLjxo1T3bp1y1XMqVOnFBISoo0bN+rWW29VZmamateurcWLF+u+++6TJP30009q2bKl4uPj1alTJ61atUp9+/bViRMnVKdOHUnS22+/raeeekqnTp2Sr6/vVd+XS88BAHAud6yz45LbRRw7dkxxcXHKy8vT4MGD1bp163IXeqnMzExJF1dplqQdO3bo/Pnz6t69u71PixYt1LBhQ3vYiY+PV5s2bexBR5JiYmI0duxY7d+/XzfeeGOh9zl37pzOnTtnf56VleXUcQAAcK3r2TpMd0WGVq4VlNevX6++ffvqzz//vPhCHx99+OGHGjp0qFMKsdlsmjhxorp06WIPUampqfL19S10K4o6deooNTXV3ufSoFOwvWBbUWbPnq2ZM2c6pW4AAFC0inJvrBJPUJ4+fbruuusu/fbbbzp9+rRGjx6tJ5980mmFxMbGat++ffr000+dts/iTJs2TZmZmfbH8ePHXf6eAADAM0ocdvbt26dZs2YpLCxM1113nV566SWlpaXp9OnT5S5i/PjxWrFihdavX+9w64nQ0FDl5eUpIyPDof/JkycVGhpq73P51VkFzwv6XM7Pz0+BgYEODwAAYE4lDjtZWVmqVauW/Xm1atVUtWpV+zybsjAMQ+PHj9eXX36pdevWKTw83GH7TTfdpCpVqmjt2rX2toMHDyo5Odl+JVhUVJT27t2rtLQ0e5+4uDgFBgYqMjKyzLUBAABzKNUE5TVr1shqtdqf22w2rV27Vvv27bO3Xbqy8tXExsZq8eLF+uqrr1SjRg37HBur1aqqVavKarVq1KhReuKJJxQcHKzAwEA99thjioqKUqdOnSRJPXr0UGRkpIYNG6YXX3xRqampevrppxUbGys/P7/SDA8AAJhQiS899/K6+kEgi8Wi/Pz8kr+5pegZ2fPmzdPIkSMlXVxUcPLkyfrkk0907tw5xcTE6K233nI4RfXLL79o7Nix2rBhgwICAjRixAi98MIL8vEpWZbj0nMAACqfkv79LtM6O2ZD2AEAoPIp6d/vUt8uAgAAoDIh7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMrU9jJyMjQ+++/r2nTpik9PV2StHPnTv32229OLQ4AAKC8SrWooCTt2bNH3bt3l9Vq1bFjxzR69GgFBwfrv//9r5KTk/XRRx+5ok4AAIAyKfWRnSeeeEIjR47U4cOH5e/vb2/v3bu3Nm3a5NTiAAAAyqvUYWf79u0aM2ZMofZ69erZb/cAAABQUZQ67Pj5+SkrK6tQ+6FDh1S7dm2nFAUAAOAspQ47d999t5599lmdP39e0sX7WyUnJ+upp57SwIEDnV4gAABAeZQ67LzyyivKyclRSEiI/vzzT3Xr1k1NmzZVjRo19Pzzz7uiRgAAgDIr9dVYVqtVcXFx2rp1q3bv3q2cnBy1b99e3bt3d0V9AACgkvozL1+zVibq2Omzalyzmv7eO1JVfb3dXodT7nqekZGhoKAgJ5TjGdz1HAAA5xr90XbFJaYVar8rMkTvDb/FKe/hsruez5kzR0uWLLE/f+CBB1SzZk3Vq1dPu3fvLlu1AADANIoLOpIUl5im0R9td2s9pQ47b7/9tho0aCBJiouLU1xcnFatWqVevXppypQpTi8QAABUHn/m5RcbdArEJabpz7x8N1VUhjk7qamp9rCzYsUKPfDAA+rRo4caN26sjh07Or1AAABQecxamVjifs/1b+Piai4q9ZGd6667TsePH5ckrV692j4x2TAM5ee7L6UBAICK59jps07t5wylPrIzYMAA/eUvf1GzZs10+vRp9erVS5L0448/qmnTpk4vEADKKt9mKCEpXWnZuQqp4a8O4cHy9rJ4uizA1BrXrKbNh0vWz11KHXZeffVVhYeHKzk5WS+++KKqV68uSUpJSdG4ceOcXiAAlMXqfSmauTxRKZm59rYwq79m9ItUz9ZhHqwMMLe/947Uwu+SS9TPXUoVds6fP68xY8Zo+vTpCg8Pd9g2adIkpxYGAGW1el+Kxi7aqcvX1UjNzNXYRTs1d2h7Ag/gIlV9vdWoZlX9cvrPYvs0qlnVrevtlGrOTpUqVbR06VJX1QIA5ZZvMzRzeWKhoCPJ3jZzeaLybeVeYgxAEfIu2HQ8vfigI0nH0/9U3gWbmyoqwwTl/v37a9myZS4oBQDKLyEp3eHU1eUMSSmZuUpISndfUcA1ZGH8MV3t/yVsxsV+7lLqOTvNmjXTs88+q61bt+qmm25SQECAw/YJEyY4rTgAKK207OKDTln6ASidX9JLdpVVSfs5Q6nDzgcffKCgoCDt2LFDO3bscNhmsVgIOwA8KqSGv1P7ASidRsElu8qqpP2codRhJykpyRV1AIBTdAgPVpjVX6mZuUXO27FICrVevAwdgPMNi2qs51ceuOKpLC/LxX7uUuo5O5cyDENOuI8oADiNt5dFM/pdvKT18hV1Cp7P6BfJejuAi/j6eGl0dPgV+4yODpevT7kiSKmU6Z0++ugjtWnTRlWrVlXVqlXVtm1bLVy40Nm1AUCZ9GwdprlD2yvU6niqKtTqz2XngBtM6x2pMbeG6/L/p/CySGNuDdc0N66xI0kWo5SHZv7f//t/mj59usaPH68uXbpIkrZs2aI333xT//rXvyrlejslvUU8gMqFFZQBz8q7YNPC+GP6Jf2sGgVX07Coxk49olPSv9+lDjvh4eGaOXOmhg8f7tC+YMECPfPMM5VyTg9hBwCAyqekf79LHa9SUlLUuXPnQu2dO3dWSkpKaXcHAADgUqUOO02bNtVnn31WqH3JkiVq1qyZU4oCAABwllJfej5z5kwNGjRImzZtss/Z2bp1q9auXVtkCAIAAPCkUh/ZGThwoL7//nvVqlVLy5Yt07Jly1SrVi0lJCTo3nvvdUWNAAAAZVbqCcpmxARlAAAqn5L+/S7xaaysrKwS9SMsAACAiqTEYScoKEgWS/HrUxiGIYvFovz8fKcUBgAA4AwlDjvr16+3/9swDPXu3Vvvv/++6tWr55LCAAAAnKHEYadbt24Oz729vdWpUyc1adLE6UUBAAA4i/vuwgUAAOABhB0AAGBq5Qo7V5qwDAAAUBGUeM7OgAEDHJ7n5ubq0UcfVUBAgEP7f//7X+dUBgAA4AQlDjtWq9Xh+dChQ51eDAAAgLOVOOzMmzfPlXUAAAC4BBOUAQCAqRF2AACAqRF2AACAqXk07GzatEn9+vVT3bp1ZbFYtGzZMoftI0eOlMVicXj07NnToU96erqGDBmiwMBABQUFadSoUcrJyXHjKAAAQFHyLtj0weaf9c+v9umDzT8r74LNI3WUeIKyK5w5c0bt2rXTQw89VOjS9gI9e/Z0mBzt5+fnsH3IkCFKSUlRXFyczp8/r7/+9a965JFHtHjxYpfWDgAAijd7ZaLe25wkm/G/tudXHtDo6HBN6x3p1lo8GnZ69eqlXr16XbGPn5+fQkNDi9x24MABrV69Wtu3b9fNN98sSXrjjTfUu3dvvfzyy6pbt67TawYAAFc2e2Wi3tmUVKjdZsje7s7AU+Hn7GzYsEEhISFq3ry5xo4dq9OnT9u3xcfHKygoyB50JKl79+7y8vLS999/74lyAQC4puVdsOm9zYWDzqXe25zk1lNaFTrs9OzZUx999JHWrl2rOXPmaOPGjerVq5fy8/MlSampqQoJCXF4jY+Pj4KDg5Wamlrsfs+dO6esrCyHBwAAKL+F8cccTl0VxWZc7OcuHj2NdTWDBw+2/7tNmzZq27atIiIitGHDBt15551l3u/s2bM1c+ZMZ5QIAAAu8Uv6Waf2c4YKfWTnck2aNFGtWrV05MgRSVJoaKjS0tIc+ly4cEHp6enFzvORpGnTpikzM9P+OH78uEvrBgDgWtEouJpT+zlDpQo7v/76q06fPq2wsDBJUlRUlDIyMrRjxw57n3Xr1slms6ljx47F7sfPz0+BgYEODwAAUH7DohrLy3LlPl6Wi/3cxaNhJycnR7t27dKuXbskSUlJSdq1a5eSk5OVk5OjKVOm6LvvvtOxY8e0du1a3XPPPWratKliYmIkSS1btlTPnj01evRoJSQkaOvWrRo/frwGDx7MlVgAAHiAr4+XRkeHX7HP6Ohw+fq4L4JYDMO4yjQi19mwYYNuv/32Qu0jRozQ3Llz1b9/f/3444/KyMhQ3bp11aNHDz333HOqU6eOvW96errGjx+v5cuXy8vLSwMHDtTrr7+u6tWrl7iOrKwsWa1WZWZmcpQHAAAnKGqdHS+LnLrOTkn/fns07FQUhB0AAJwv74JNC+OP6Zf0s2oUXE3Doho79YhOSf9+V+irsQAAQOXl6+OlUdFNPF1G5ZqgDAAAUFqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGo+ni4AAFwl74JNC+OP6Zf0s2oUXE3DohrL14f/xwOuNYQdAKY0e2Wi3tucJJvxv7bnVx7Q6OhwTesd6bnCALgdYQeA6cxemah3NiUVarcZsrcTeIBrB8dzAZhK3gWb3ttcOOhc6r3NScq7YHNTRQA8jbADwFQWxh9zOHVVFJtxsR+AawNhB4Cp/JJ+1qn9AFR+hB0AptIouJpT+wGo/Ag7AExlWFRjeVmu3MfLcrEfgGsDYQeAqfj6eGl0dPgV+4yODme9HeAawqXnAEyn4LLyy9fZ8bKIdXaAa5DFMIyrXLdgfllZWbJarcrMzFRgYKCnywHgJKygDJhbSf9+c2THRfJthhKS0pWWnauQGv7qEB4s76tNJADgVL4+XhoV3cTTZQDwMMKOC6zel6KZyxOVkplrbwuz+mtGv0j1bB3mwcoAALj2cDzXyVbvS9HYRTsdgo4kpWbmauyinVq9L8VDlQEAcG0i7DhRvs3QzOWJKmoSVEHbzOWJyr/a8q4AAMBpOI3lRAlJ6YWO6FzKkJSSmauEpHRFRdR0X2HANerPvHzNWpmoY6fPqnHNavp770hV9fX2dFkA3Iyw40Rp2cUHnbL0A1B2oz/arrjENPvzzYelhd8l667IEL03/BYPVgbA3TiN5UQhNfyd2g9A2VwedC4Vl5im0R9td3NFADyJsONEHcKDFVStyhX7BFWrog7hwW6qCLj2/JmXX2zQKRCXmKY/8/LdVBEATyPsuBkr7QCuNWtlolP7Aaj8CDtOlJCUroyz56/Y54+z55WQlO6mioBrz7HTZ53aD0DlR9hxIiYoA57XuGY1p/YDUPkRdpyICcqA5/29hDf5LGk/AJUfYceJOoQHK8zqX+y8HIsu3jaCCcqA61T19Vbb+le+oW/b+oGstwNcQwg7TuTtZdGMfhf/b/HywFPwfEa/SG4ICrhQvs3Qqey8K/Y5lZ3HSubANYSw42Q9W4dp7tD2CrU6nqoKtfpr7tD23AgUcLGrrWQu/W8lcwDXBlZQdoGercN0V2SoEpLSlZadq5AaF09dcUQHcD0uFABwOcKOi3h7Wbj/FeABXCgA4HIePY21adMm9evXT3Xr1pXFYtGyZcscthuGoX/+858KCwtT1apV1b17dx0+fNihT3p6uoYMGaLAwEAFBQVp1KhRysnJceMoAFQkXCgA4HIeDTtnzpxRu3bt9Oabbxa5/cUXX9Trr7+ut99+W99//70CAgIUExOj3Nz/HX4eMmSI9u/fr7i4OK1YsUKbNm3SI4884q4hAKhguFAAwOUshmFUiEsSLBaLvvzyS/Xv31/SxaM6devW1eTJk/W3v/1NkpSZmak6depo/vz5Gjx4sA4cOKDIyEht375dN998syRp9erV6t27t3799VfVrVu3RO+dlZUlq9WqzMxMBQZe+ZLVksq3GczZATxo9b4UzVye6DBZOczqrxn9IrlQADCJkv79rrBzdpKSkpSamqru3bvb26xWqzp27Kj4+HgNHjxY8fHxCgoKsgcdSerevbu8vLz0/fff69577y1y3+fOndO5c+fsz7OyspxaO/+RBTyPCwUAFKiwl56npqZKkurUqePQXqdOHfu21NRUhYSEOGz38fFRcHCwvU9RZs+eLavVan80aNDAaXWv3peisYt2Frr0NTUzV2MX7dTqfSlOey8AV1ZwocA9N9RTVERNgg5wjaqwYceVpk2bpszMTPvj+PHjTtlvvs3QzOWJKuq8YEHbzOWJLGYGAIAbVdiwExoaKkk6efKkQ/vJkyft20JDQ5WWluaw/cKFC0pPT7f3KYqfn58CAwMdHs5wtcXMDLGYGQAA7lZhw054eLhCQ0O1du1ae1tWVpa+//57RUVFSZKioqKUkZGhHTt22PusW7dONptNHTt2dHvNLGYGAEDF49EJyjk5OTpy5Ij9eVJSknbt2qXg4GA1bNhQEydO1L/+9S81a9ZM4eHhmj59uurWrWu/Yqtly5bq2bOnRo8erbffflvnz5/X+PHjNXjw4BJfieVMtar7ObUfAAAoP4+GnR9++EG33367/fkTTzwhSRoxYoTmz5+vJ598UmfOnNEjjzyijIwMde3aVatXr5a///9WPv344481fvx43XnnnfLy8tLAgQP1+uuvu30sklTkZJ3y9AMAAOVWYdbZ8SRnrbPz5Y+/adKSXVft9+qgG3TvjfXK/D4AAKDkf78r7Jydyig959zVO5WiHwAAKD/CjhMFB/g6tR8AACg/wo4TcbdlAAAqHsKOM5V0cVYWcQUAwG0IO070ewnn4pS0HwAAKD/CjhNxGgsAgIqHsONEHcKDFWb1L/YslUUX737eITzYnWUBAHBNI+w4kbeXRTP6RUoqPC2n4PmMfpHceRkAADci7DhZz9Zhmju0vUKtjqeqQq3+mju0vXq2DvNQZQAAXJs8ersIs+rZOkx3RYYqISldadm5Cqlx8dQVR3QAAHA/wo6LeHtZFBVR09NlAABwzeM0FgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDUfTxdgVnkXbFoYf0y/pJ9Vo+BqGhbVWL4+ZEsAANyNsOMCs1cm6r3NSbIZ/2t7fuUBjY4O17TekZ4rDACAaxBhx8lmr0zUO5uSCrXbDNnbCTwAALgP51WcKO+CTe9uLhx0LvXu5iTlXbC5qSIAAEDYcaIF247JMK7cxzAu9gMAAO5B2HGi7cdOO7UfAAAoP8KOE1XzLdkUqJL2AwAA5UfYcaKB7es7tR8AACg/wo4TdWxS06n9AABA+RF2nGjHL384tR8AACg/wo4TpWXnOrUfAAAoP8KOE4XU8HdqPwAAUH6EHSfqEB6sMKu/LMVst0gKs/qrQ3iwO8sCAOCaRthxIm8vi2b0u3griMsDT8HzGf0i5e1VXBwCAADORthxsp6twzR3aHuFWh1PVYVa/TV3aHv1bB3mocoAALg2sbqdC/RsHaa7IkOVkJSutOxchdS4eOqKIzoAALgfYcdFvL0siopgPR0AADyNsOMi+TaDIzsAAFQAhB0XWL0vRTOXJyol83/r6YRZ/TWjXyRzdgAAcDMmKDvZ6n0pGrtop0PQkaTUzFyNXbRTq/eleKgyAACuTRU67DzzzDOyWCwOjxYtWti35+bmKjY2VjVr1lT16tU1cOBAnTx50mP15tsMzVyeKKOIbQVtM5cnKt9WVA8AAOAKFTrsSFKrVq2UkpJif2zZssW+bdKkSVq+fLk+//xzbdy4USdOnNCAAQM8VmtCUnqhIzqXMiSlZOYqISndfUUBAHCNq/Bzdnx8fBQaGlqoPTMzUx988IEWL16sO+64Q5I0b948tWzZUt999506derk7lK5NxYAABVQhT+yc/jwYdWtW1dNmjTRkCFDlJycLEnasWOHzp8/r+7du9v7tmjRQg0bNlR8fPwV93nu3DllZWU5PJyBe2MBAFDxVOiw07FjR82fP1+rV6/W3LlzlZSUpOjoaGVnZys1NVW+vr4KCgpyeE2dOnWUmpp6xf3Onj1bVqvV/mjQoIFT6uXeWAAAVDwVOuz06tVL999/v9q2bauYmBitXLlSGRkZ+uyzz8q132nTpikzM9P+OH78uFPq5d5YAABUPBU67FwuKChI119/vY4cOaLQ0FDl5eUpIyPDoc/JkyeLnONzKT8/PwUGBjo8nIV7YwEAULFU+AnKl8rJydHRo0c1bNgw3XTTTapSpYrWrl2rgQMHSpIOHjyo5ORkRUVFebRO7o0FAEDFUaHDzt/+9jf169dPjRo10okTJzRjxgx5e3vrwQcflNVq1ahRo/TEE08oODhYgYGBeuyxxxQVFeWRK7Eux72xAACoGCp02Pn111/14IMP6vTp06pdu7a6du2q7777TrVr15Ykvfrqq/Ly8tLAgQN17tw5xcTE6K233vJw1QAAoCKxGIZxzS/nm5WVJavVqszMTKfO3wEAAK5T0r/flWqCMgAAQGkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKlV6EUF3aVgqaGsrCwPVwIAAEqq4O/21ZYMJOxIys7OliQ1aNDAw5UAAIDSys7OltVqLXY7KyhLstlsOnHihGrUqCGLxXk368zKylKDBg10/Phx067MbPYxMr7Kz+xjZHyVn9nH6MrxGYah7Oxs1a1bV15exc/M4ciOJC8vL9WvX99l+w8MDDTlL/ClzD5Gxlf5mX2MjK/yM/sYXTW+Kx3RKcAEZQAAYGqEHQAAYGqEHRfy8/PTjBkz5Ofn5+lSXMbsY2R8lZ/Zx8j4Kj+zj7EijI8JygAAwNQ4sgMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsOMkL7zwgiwWiyZOnHjFfp9//rlatGghf39/tWnTRitXrnRPgeVUkvHNnz9fFovF4eHv7+++IkvpmWeeKVRvixYtrviayvT5lXZ8le3zK/Dbb79p6NChqlmzpqpWrao2bdrohx9+uOJrNmzYoPbt28vPz09NmzbV/Pnz3VNsGZR2fBs2bCj0OVosFqWmprqx6pJr3LhxkfXGxsYW+5rK9D0s7fgq2/cwPz9f06dPV3h4uKpWraqIiAg999xzV71Xlbu/g6yg7ATbt2/XO++8o7Zt216x37Zt2/Tggw9q9uzZ6tu3rxYvXqz+/ftr586dat26tZuqLb2Sjk+6uELmwYMH7c+defsNV2jVqpW+/fZb+3Mfn+K/EpXx8yvN+KTK9/n98ccf6tKli26//XatWrVKtWvX1uHDh3XdddcV+5qkpCT16dNHjz76qD7++GOtXbtWDz/8sMLCwhQTE+PG6q+uLOMrcPDgQYfVakNCQlxZaplt375d+fn59uf79u3TXXfdpfvvv7/I/pXte1ja8UmV63s4Z84czZ07VwsWLFCrVq30ww8/6K9//ausVqsmTJhQ5Gs88h00UC7Z2dlGs2bNjLi4OKNbt27G448/XmzfBx54wOjTp49DW8eOHY0xY8a4uMqyK8345s2bZ1itVrfVVl4zZsww2rVrV+L+le3zK+34KtvnZxiG8dRTTxldu3Yt1WuefPJJo1WrVg5tgwYNMmJiYpxZmlOUZXzr1683JBl//PGHa4pysccff9yIiIgwbDZbkdsr2/fwclcbX2X7Hvbp08d46KGHHNoGDBhgDBkypNjXeOI7yGmscoqNjVWfPn3UvXv3q/aNj48v1C8mJkbx8fGuKq/cSjM+ScrJyVGjRo3UoEED3XPPPdq/f7+LKyyfw4cPq27dumrSpImGDBmi5OTkYvtWxs+vNOOTKt/n9/XXX+vmm2/W/fffr5CQEN1444167733rviayvQ5lmV8BW644QaFhYXprrvu0tatW11cqXPk5eVp0aJFeuihh4o9mlGZPr/LlWR8UuX6Hnbu3Flr167VoUOHJEm7d+/Wli1b1KtXr2Jf44nPkLBTDp9++ql27typ2bNnl6h/amqq6tSp49BWp06dCnsuvbTja968uT788EN99dVXWrRokWw2mzp37qxff/3VxZWWTceOHTV//nytXr1ac+fOVVJSkqKjo5WdnV1k/8r2+ZV2fJXt85Okn3/+WXPnzlWzZs20Zs0ajR07VhMmTNCCBQuKfU1xn2NWVpb+/PNPV5dcKmUZX1hYmN5++20tXbpUS5cuVYMGDXTbbbdp586dbqy8bJYtW6aMjAyNHDmy2D6V7Xt4qZKMr7J9D6dOnarBgwerRYsWqlKlim688UZNnDhRQ4YMKfY1HvkOuuyYkcklJycbISEhxu7du+1tVzvNU6VKFWPx4sUObW+++aYREhLiqjLLrCzju1xeXp4RERFhPP300y6o0Pn++OMPIzAw0Hj//feL3F6ZPr+iXG18l6sMn1+VKlWMqKgoh7bHHnvM6NSpU7GvadasmTFr1iyHtm+++caQZJw9e9YldZZVWcZXlFtvvdUYOnSoM0tziR49ehh9+/a9Yp/K/D0syfguV9G/h5988olRv35945NPPjH27NljfPTRR0ZwcLAxf/78Yl/jie8gR3bKaMeOHUpLS1P79u3l4+MjHx8fbdy4Ua+//rp8fHwcJqQVCA0N1cmTJx3aTp48qdDQUHeVXWJlGd/lClL+kSNH3FBx+QUFBen6668vtt7K9PkV5Wrju1xl+PzCwsIUGRnp0NayZcsrnq4r7nMMDAxU1apVXVJnWZVlfEXp0KFDhf4cJemXX37Rt99+q4cffviK/Srr97Ck47tcRf8eTpkyxX50p02bNho2bJgmTZp0xTMCnvgOEnbK6M4779TevXu1a9cu++Pmm2/WkCFDtGvXLnl7exd6TVRUlNauXevQFhcXp6ioKHeVXWJlGd/l8vPztXfvXoWFhbmh4vLLycnR0aNHi623Mn1+Rbna+C5XGT6/Ll26OFy1IkmHDh1So0aNin1NZfocyzK+ouzatatCf46SNG/ePIWEhKhPnz5X7FeZPr9LlXR8l6vo38OzZ8/Ky8sxSnh7e8tmsxX7Go98hi45XnSNuvw0z7Bhw4ypU6fan2/dutXw8fExXn75ZePAgQPGjBkzjCpVqhh79+71QLWld7XxzZw501izZo1x9OhRY8eOHcbgwYMNf39/Y//+/R6o9uomT55sbNiwwUhKSjK2bt1qdO/e3ahVq5aRlpZmGEbl//xKO77K9vkZhmEkJCQYPj4+xvPPP28cPnzY+Pjjj41q1aoZixYtsveZOnWqMWzYMPvzn3/+2ahWrZoxZcoU48CBA8abb75peHt7G6tXr/bEEK6oLON79dVXjWXLlhmHDx829u7dazz++OOGl5eX8e2333piCCWSn59vNGzY0HjqqacKbavs30PDKN34Ktv3cMSIEUa9evWMFStWGElJScZ///tfo1atWsaTTz5p71MRvoOEHSe6PAx069bNGDFihEOfzz77zLj++usNX19fo1WrVsY333zj3iLL4WrjmzhxotGwYUPD19fXqFOnjtG7d29j586d7i+0hAYNGmSEhYUZvr6+Rr169YxBgwYZR44csW+v7J9facdX2T6/AsuXLzdat25t+Pn5GS1atDDeffddh+0jRowwunXr5tC2fv1644YbbjB8fX2NJk2aGPPmzXNfwaVU2vHNmTPHiIiIMPz9/Y3g4GDjtttuM9atW+fmqktnzZo1hiTj4MGDhbZV9u+hYZRufJXte5iVlWU8/vjjRsOGDQ1/f3+jSZMmxj/+8Q/j3Llz9j4V4TtoMYyrLHMIAABQiTFnBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphB0Clddttt2nixIn2540bN9a///3vcu1zw4YNslgsysjIKNd+AFQchB0AHpOamqrHHntMTZo0kZ+fnxo0aKB+/foVum9OSW3fvl2PPPKIk6sEUNn5eLoAANemY8eOqUuXLgoKCtJLL72kNm3a6Pz581qzZo1iY2P1008/lXqftWvXdkGlpZeXlydfX19PlwHg/8eRHQAeMW7cOFksFiUkJGjgwIG6/vrr1apVKz3xxBP67rvv9NBDD6lv374Orzl//rxCQkL0wQcfFLnPy09jWSwWvf/++7r33ntVrVo1NWvWTF9//bXDa1auXKnrr79eVatW1e23365jx44V2u+WLVsUHR2tqlWrqkGDBpowYYLOnDnj8L7PPfechg8frsDAQI4uARUMYQeA26Wnp2v16tWKjY1VQEBAoe1BQUF6+OGHtXr1aqWkpNjbV6xYobNnz2rQoEElfq+ZM2fqgQce0J49e9S7d28NGTJE6enpkqTjx49rwIAB6tevn3bt2qWHH35YU6dOdXj90aNH1bNnTw0cOFB79uzRkiVLtGXLFo0fP96h38svv6x27drpxx9/1PTp00vz4wDgYoQdAG535MgRGYahFi1aFNunc+fOat68uRYuXGhvmzdvnu6//35Vr169xO81cuRIPfjgg2ratKlmzZqlnJwcJSQkSJLmzp2riIgIvfLKK2revLmGDBmikSNHOrx+9uzZGjJkiCZOnKhmzZqpc+fOev311/XRRx8pNzfX3u+OO+7Q5MmTFRERoYiIiBLXB8D1CDsA3M4wjBL1e/jhhzVv3jxJ0smTJ7Vq1So99NBDpXqvtm3b2v8dEBCgwMBApaWlSZIOHDigjh07OvSPiopyeL57927Nnz9f1atXtz9iYmJks9mUlJRk73fzzTeXqi4A7sMEZQBu16xZM1kslqtOQh4+fLimTp2q+Ph4bdu2TeHh4YqOji7Ve1WpUsXhucVikc1mK/Hrc3JyNGbMGE2YMKHQtoYNG9r/XdTpOAAVA2EHgNsFBwcrJiZGb775piZMmFAoKGRkZCgoKEg1a9ZU//79NW/ePMXHx+uvf/2rU+to2bJloQnL3333ncPz9u3bKzExUU2bNnXqewNwH05jAfCIN998U/n5+erQoYOWLl2qw4cP68CBA3r99dcdTiU9/PDDWrBggQ4cOKARI0Y4tYZHH31Uhw8f1pQpU3Tw4EEtXrxY8+fPd+jz1FNPadu2bRo/frx27dqlw4cP66uvvio0QRlAxUXYAeARTZo00c6dO3X77bdr8uTJat26te666y6tXbtWc+fOtffr3r27wsLCFBMTo7p16zq1hoYNG2rp0qVatmyZ2rVrp7fffluzZs1y6NO2bVtt3LhRhw4dUnR0tG688Ub985//dHotAFzHYpR0piAAeEBOTo7q1aunefPmacCAAZ4uB0AlxJwdABWSzWbT77//rldeeUVBQUG6++67PV0SgEqKsAOgQkpOTlZ4eLjq16+v+fPny8eH/1wBKBtOYwEAAFNjgjIAADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADC1/w+99nWR45ctHwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "plt.scatter(df['cyl'], df['hp'])\n", + "plt.title('Cylinder vs Horse Power')\n", + "plt.xlabel('Cylinder')\n", + "plt.ylabel('Horse Power')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c9ac6ed4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "plt.scatter(df['mpg'], df['hp'])\n", + "plt.title('MPG vs Horse Power')\n", + "plt.xlabel('MPG')\n", + "plt.ylabel('Horse Power')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c1b3c30b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " mpg cyl disp hp drat wt qsec \\\n", + "mpg 1.000000 -0.852162 -0.847551 -0.776168 0.681172 -0.867659 0.418684 \n", + "cyl -0.852162 1.000000 0.902033 0.832447 -0.699938 0.782496 -0.591242 \n", + "disp -0.847551 0.902033 1.000000 0.790949 -0.710214 0.887980 -0.433698 \n", + "hp -0.776168 0.832447 0.790949 1.000000 -0.448759 0.658748 -0.708223 \n", + "drat 0.681172 -0.699938 -0.710214 -0.448759 1.000000 -0.712441 0.091205 \n", + "wt -0.867659 0.782496 0.887980 0.658748 -0.712441 1.000000 -0.174716 \n", + "qsec 0.418684 -0.591242 -0.433698 -0.708223 0.091205 -0.174716 1.000000 \n", + "vs 0.664039 -0.810812 -0.710416 -0.723097 0.440278 -0.554916 0.744535 \n", + "am 0.599832 -0.522607 -0.591227 -0.243204 0.712711 -0.692495 -0.229861 \n", + "gear 0.480285 -0.492687 -0.555569 -0.125704 0.699610 -0.583287 -0.212682 \n", + "carb -0.550925 0.526988 0.394977 0.749812 -0.090790 0.427606 -0.656249 \n", + "\n", + " vs am gear carb \n", + "mpg 0.664039 0.599832 0.480285 -0.550925 \n", + "cyl -0.810812 -0.522607 -0.492687 0.526988 \n", + "disp -0.710416 -0.591227 -0.555569 0.394977 \n", + "hp -0.723097 -0.243204 -0.125704 0.749812 \n", + "drat 0.440278 0.712711 0.699610 -0.090790 \n", + "wt -0.554916 -0.692495 -0.583287 0.427606 \n", + "qsec 0.744535 -0.229861 -0.212682 -0.656249 \n", + "vs 1.000000 0.168345 0.206023 -0.569607 \n", + "am 0.168345 1.000000 0.794059 0.057534 \n", + "gear 0.206023 0.794059 1.000000 0.274073 \n", + "carb -0.569607 0.057534 0.274073 1.000000 \n" + ] + } + ], + "source": [ + "# 각 변수들 간의 상관 관계를 알아 보기 위해 상관계수를 구하는 표를 출력하세요.\n", + "print(df.drop(columns=['name']).corr())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "22172777", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.8676593765172281\n" + ] + } + ], + "source": [ + "plt.scatter(df['wt'], df['mpg'])\n", + "plt.title('Weight vs MPG')\n", + "plt.xlabel('Weight')\n", + "plt.ylabel('MPG')\n", + "plt.show()\n", + "\n", + "print(df['wt'].corr(df['mpg']))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3e4168b2", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAHHCAYAAABKudlQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAOTNJREFUeJzt3X98VNWd//H3JJAEIRmMEpJIgADyI6IUECwBwg/lh3WxYLuALS6o1SWCKyruarsWotJIq7hdF8GHfShW1hVbfii4RlQSMBBABZSIImSjUEgKBZyEH4mYOd8/+GbqJBPyazIzZ/J6Ph55+MidO5PPuTdy37nn3HMcxhgjAAAAS0UEuwAAAIDmIMwAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFiNMAMAAKxGmAEAAFYjzABolFmzZql79+5Nfm+HDh38WxCAVo8wA4SB119/XQ6HQ2vXrq312oABA+RwOJSbm1vrta5duyo9PT0QJTbK2bNntXDhQuXl5QW7FAAWIMwAYWDEiBGSpPz8fK/tZWVlKiwsVJs2bbR161av1w4fPqzDhw973ttQL7zwgvbv39+8gutx9uxZZWVlEWYANEibYBcAoPmSk5OVmppaK8wUFBTIGKN//Md/rPVa9feNDTNt27ZtXrEA4GfcmQHCxIgRI7R7926dO3fOs23r1q266qqrdOONN2r79u1yu91erzkcDg0fPtyzbeXKlRo8eLDatWun+Ph4TZ8+XYcPH/b6Ob7GzJw4cUK33Xab4uLi1LFjR82cOVOffPKJHA6HVqxYUavWI0eOaPLkyerQoYM6deqk+fPnq6qqSpL01VdfqVOnTpKkrKwsORwOORwOLVy40Ge7P/roIzkcDr388su1XnvnnXfkcDi0YcMGSVJ5ebnmzZun7t27Kzo6WgkJCRo3bpx27dpV94H9//Lz8zVkyBDFxMSoZ8+eev7557Vw4UI5HA6v/d59912NGDFCHTt2VIcOHdSnTx/98pe/9NqnsrJSCxYsUK9evRQdHa2UlBT967/+qyorK2v93JUrV2ro0KG65JJLdOmllyojI0MbN26st16gNeHODBAmRowYoVdeeUU7duzQ6NGjJV0ILOnp6UpPT5fL5VJhYaGuueYaz2t9+/bVZZddJklatGiRHn30UU2dOlW/+MUvdPz4cT377LPKyMjQ7t271bFjR58/1+12a9KkSdq5c6cyMzPVt29fvfHGG5o5c6bP/auqqjRhwgRdd911euqpp/Tee+/p6aefVs+ePZWZmalOnTpp2bJlyszM1JQpU3TLLbdIkqfumq699lr16NFDr7/+eq2fuWrVKl166aWaMGGCJGn27Nn685//rLlz5yotLU0nTpxQfn6+Pv/8cw0aNKjOY7t3716NHz9enTp10sKFC/Xdd99pwYIF6ty5s9d+n332mf7hH/5B11xzjR577DFFR0fr4MGDXl18brdbN998s/Lz83X33XerX79+2rt3r5555hl9+eWXWrdunWffrKwsLVy4UOnp6XrssccUFRWlHTt2aNOmTRo/fnyd9QKtjgEQFj777DMjyTz++OPGGGPOnz9v2rdvb15++WVjjDGdO3c2S5cuNcYYU1ZWZiIjI81dd91ljDHmq6++MpGRkWbRokVen7l3717Tpk0br+0zZ8403bp183y/evVqI8n8x3/8h2dbVVWVGTt2rJFkXnrpJa/3SjKPPfaY188ZOHCgGTx4sOf748ePG0lmwYIFDWr7I488Ytq2bWtOnjzp2VZZWWk6duxo7rjjDs82p9Np5syZ06DP/L7JkyebmJgY8/XXX3u27du3z0RGRprv/zP6zDPPGEnm+PHjdX7WK6+8YiIiIswHH3zgtX358uVGktm6dasxxpgDBw6YiIgIM2XKFFNVVeW1r9vtbnQbgHBGNxMQJvr166fLLrvMMxbmk08+0ZkzZzxPK6Wnp3vuEBQUFKiqqsozXmbNmjVyu92aOnWq/va3v3m+EhMTdeWVV/p8EqpaTk6O2rZtq7vuusuzLSIiQnPmzKnzPbNnz/b6fuTIkfq///u/pjVc0rRp03T+/HmtWbPGs23jxo365ptvNG3aNM+2jh07aseOHTp69GiDP7uqqkrvvPOOJk+erK5du3q29+vXz3PH5/ufL0lvvPGGV5fe9/3pT39Sv3791LdvX69jPXbsWEnyHOt169bJ7Xbr17/+tSIivP+prtm1BbR2hBkgTDgcDqWnp3vGxmzdulUJCQnq1auXJO8wU/3f6jBz4MABGWN05ZVXqlOnTl5fn3/+uY4dO1bnz/3666+VlJSkSy65xGt79c+tKSYmxjMmptqll16qU6dONa3huvD4ed++fbVq1SrPtlWrVunyyy/3hARJ+u1vf6vCwkKlpKRo6NChWrhwYb0h6vjx4zp37pyuvPLKWq/16dPH6/tp06Zp+PDh+sUvfqHOnTtr+vTpev31172CzYEDB/TZZ5/VOs69e/eWJM+xLioqUkREhNLS0hp/QIBWhjEzQBgZMWKE1q9fr71793rGy1RLT0/XQw89pCNHjig/P1/Jycnq0aOHpAvjOBwOh95++21FRkbW+lx/TnTn6/P9Ydq0aVq0aJH+9re/KTY2Vm+++aZuvfVWtWnz93/mpk6dqpEjR2rt2rXauHGjfve732nx4sVas2aNbrzxxmbX0K5dO23ZskW5ubl66623lJOTo1WrVmns2LHauHGjIiMj5Xa7dfXVV2vJkiU+PyMlJaXZdQCtDWEGCCPfn29m69atmjdvnue1wYMHKzo6Wnl5edqxY4d+9KMfeV7r2bOnjDFKTU313CFoqG7duik3N1dnz571ujtz8ODBJrejKd0o06ZNU1ZWllavXq3OnTurrKxM06dPr7VfUlKS7rnnHt1zzz06duyYBg0apEWLFtUZZjp16qR27drpwIEDtV7zNd9ORESErr/+el1//fVasmSJfvOb3+hXv/qVcnNzdcMNN6hnz5765JNPdP3111+0nT179pTb7da+ffv0gx/8oOEHAmiF6GYCwsi1116rmJgY/fd//7eOHDnidWcmOjpagwYN0tKlS3XmzBmv+WVuueUWRUZGKisrS8YYr880xujEiRN1/swJEybo/PnzeuGFFzzb3G63li5d2uR2VIeib775psHv6devn66++mqtWrVKq1atUlJSkjIyMjyvV1VVyeVyeb0nISFBycnJPh+JrhYZGakJEyZo3bp1OnTokGf7559/rnfeecdr35MnT9Z6f3UQqf4ZU6dO1ZEjR7yOV7Vz587pzJkzkqTJkycrIiJCjz32WK3xNzXPEdDacWcGCCNRUVEaMmSIPvjgA0VHR2vw4MFer6enp+vpp5+W5D1ZXs+ePfXEE0/okUce0VdffaXJkycrNjZWxcXFWrt2re6++27Nnz/f58+cPHmyhg4dqgcffFAHDx5U37599eabb3ou7E25y9KuXTulpaVp1apV6t27t+Lj49W/f3/179//ou+bNm2afv3rXysmJkZ33nmn18DZ8vJydenSRT/96U81YMAAdejQQe+9954+/PBDzzGpS1ZWlnJycjRy5Ejdc889+u677/Tss8/qqquu0qeffurZ77HHHtOWLVt00003qVu3bjp27Jiee+45denSxXO8b7vtNr3++uuaPXu2cnNzNXz4cFVVVemLL77Q66+/rnfeeUfXXnutevXqpV/96ld6/PHHNXLkSN1yyy2Kjo7Whx9+qOTkZGVnZzf6uAJhK6jPUgHwu0ceecRIMunp6bVeW7NmjZFkYmNjzXfffVfr9dWrV5sRI0aY9u3bm/bt25u+ffuaOXPmmP3793v2qflotjEXHqX+2c9+ZmJjY43T6TSzZs0yW7duNZLMa6+95vXe9u3b1/q5CxYsMDX/Odq2bZsZPHiwiYqKavBj2gcOHDCSjCSTn5/v9VplZaV56KGHzIABA0xsbKxp3769GTBggHnuuefq/VxjjNm8ebOnnh49epjly5fXqvv99983P/7xj01ycrKJiooyycnJ5tZbbzVffvml12d9++23ZvHixeaqq64y0dHR5tJLLzWDBw82WVlZxuVyee374osvmoEDB3r2GzVqlHn33XcbVDPQWjiM4X4lAP9bt26dpkyZovz8fK9ZhsPJwoULfXbNAQgsxswAaLbvL6EgXRif8uyzzyouLu6iM+sCgD8wZgZAs9177706d+6chg0bpsrKSq1Zs0bbtm3Tb37zG7Vr1y7Y5QEIc4QZAM02duxYPf3009qwYYMqKirUq1cvPfvss5o7d26wSwPQCjBmBgAAWI0xMwAAwGqEGQAAYLWwHzPjdrt19OhRxcbGstIsAACWMMaovLxcycnJtVaOrynsw8zRo0dZuA0AAEsdPnxYXbp0ueg+YR9mYmNjJV04GHFxcUGuBgAANERZWZlSUlI81/GLCfswU921FBcXR5gBAMAyDRkiwgBgAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGC1sJ8BGEDzVbmNdhaf1LHyCiXExmhoarwiI1i4FUBoIMwAuKicwhJlrd+nEleFZ1uSM0YLJqVpYv+kIFYGABfQzQSgTjmFJcpcucsryEhSqatCmSt3KaewJEiVAcDfEWYA+FTlNspav0/Gx2vV27LW71OV29ceABA4hBkAPu0sPlnrjsz3GUklrgrtLD4ZuKIAwAfCDACfjpXXHWSash8AtBTCDACfEmJj/LofALQUwgwAn4amxivJGaO6HsB26MJTTUNT4wNZFgDUQpgB4FNkhEMLJqVJUq1AU/39gklpzDcDIOgIMwDqNLF/kpbNGKREp3dXUqIzRstmDGKeGQAhgUnzAFzUxP5JGpeWyAzAAEIWYQZAvSIjHBrW87JglwEAPtHNBAAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAaoQZAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFiNMAMAAKxGmAEAAFYjzAAAAKsRZgAAgNUIMwAAwGqEGQAAYDXCDAAAsBphBgAAWI0wAwAArBbUMJOdna0hQ4YoNjZWCQkJmjx5svbv3+95/eTJk7r33nvVp08ftWvXTl27dtW//Mu/yOVyBbFqAAAQSoIaZjZv3qw5c+Zo+/btevfdd3X+/HmNHz9eZ86ckSQdPXpUR48e1VNPPaXCwkKtWLFCOTk5uvPOO4NZNgAACCEOY4wJdhHVjh8/roSEBG3evFkZGRk+9/nTn/6kGTNm6MyZM2rTpk29n1lWVian0ymXy6W4uDh/lwwAAFpAY67f9aeBAKruPoqPj7/oPnFxcXUGmcrKSlVWVnq+Lysr82+RAAAgpITMAGC326158+Zp+PDh6t+/v899/va3v+nxxx/X3XffXefnZGdny+l0er5SUlJaqmQAABACQqabKTMzU2+//bby8/PVpUuXWq+XlZVp3Lhxio+P15tvvqm2bdv6/Bxfd2ZSUlLoZgIAwCLWdTPNnTtXGzZs0JYtW3wGmfLyck2cOFGxsbFau3ZtnUFGkqKjoxUdHd2S5QIAgBAS1G4mY4zmzp2rtWvXatOmTUpNTa21T1lZmcaPH6+oqCi9+eabiomJCUKlAAAgVAX1zsycOXP06quv6o033lBsbKxKS0slSU6nU+3atfMEmbNnz2rlypUqKyvzDOjt1KmTIiMjg1k+AAAIAUEdM+NwOHxuf+mllzRr1izl5eVpzJgxPvcpLi5W9+7d6/0ZPJoNAIB9rBkzU1+OGj16dL37AACA1i1kHs0GAABoCsIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFiNMAMAAKxGmAEAAFYjzAAAAKsRZgAAgNUIMwAAwGqEGQAAYDXCDAAAsBphBgAAWI0wAwAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAaoQZAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFiNMAMAAKxGmAEAAFYjzAAAAKsRZgAAgNUIMwAAwGqEGQAAYDXCDAAAsBphBgAAWI0wAwAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAaoQZAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFitTbALAIDGqnIb7Sw+qWPlFUqIjdHQ1HhFRjiCXRaAICHMALBKTmGJstbvU4mrwrMtyRmjBZPSNLF/UhArAxAsdDMBsEZOYYkyV+7yCjKSVOqqUObKXcopLAlSZQCCiTADwApVbqOs9ftkfLxWvS1r/T5VuX3tASCcEWYAWGFn8clad2S+z0gqcVVoZ/HJwBUFICQQZgBY4Vh53UGmKfsBCB+EGQBWSIiN8et+AMIHYQaAFYamxivJGaO6HsB26MJTTUNT4wNZFoAQQJgBYIXICIcWTEqTpFqBpvr7BZPSmG8GaIUIM2g1qtxGBUUn9MaeIyooOsFTL80UjOM5sX+Sls0YpESnd1dSojNGy2YMYp4ZoJUK6qR52dnZWrNmjb744gu1a9dO6enpWrx4sfr06ePZp6KiQg8++KBee+01VVZWasKECXruuefUuXPnIFYO2zDRmn8F83hO7J+kcWmJzAAMwMNhjAnan6cTJ07U9OnTNWTIEH333Xf65S9/qcLCQu3bt0/t27eXJGVmZuqtt97SihUr5HQ6NXfuXEVERGjr1q0N+hllZWVyOp1yuVyKi4tryeYgRFVPtFbzF7360sdf9I3D8QQQCI25fgc1zNR0/PhxJSQkaPPmzcrIyJDL5VKnTp306quv6qc//akk6YsvvlC/fv1UUFCgH/7wh/V+JmGmdatyG41YvKnO+UkcutBFkf9vY/nLvgE4ngACpTHX75AaM+NyuSRJ8fEXnkb4+OOPdf78ed1www2effr27auuXbuqoKDA52dUVlaqrKzM66u1YExIbUy05l8cTwChKGQWmnS73Zo3b56GDx+u/v37S5JKS0sVFRWljh07eu3buXNnlZaW+vyc7OxsZWVltXS5IYcxIb4x0Zp/cTwBhKKQuTMzZ84cFRYW6rXXXmvW5zzyyCNyuVyer8OHD/upwtDF4nt1Y6I1/+J4AghFIRFm5s6dqw0bNig3N1ddunTxbE9MTNS3336rb775xmv/v/71r0pMTPT5WdHR0YqLi/P6CmcsvndxTLTmXxxPAKEoqGHGGKO5c+dq7dq12rRpk1JTU71eHzx4sNq2bav333/fs23//v06dOiQhg0bFuhyQxJjGC6Oidb8i+MJIBQFNczMmTNHK1eu1KuvvqrY2FiVlpaqtLRU586dkyQ5nU7deeedeuCBB5Sbm6uPP/5Yt99+u4YNG9agJ5laA8Yw1I+J1vyL4wkg1AR1APCyZcskSaNHj/ba/tJLL2nWrFmSpGeeeUYRERH6yU9+4jVpHi5gDEPDMNGaf3E8AYSSkJpnpiWE+zwz1fN+lLoqfI6bYd4PAICNrJ1nBo3HGAYAQGtHmAkDjGEAALRmITNpHpqHMQwAgNaKMBNGIiMcGtbzsmCXAQBAQNHNBAAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAajzNhICrchseIQcA+A1hBgGVU1iirPX7vFb6TnLGaMGkNCb3sxgBFUAwEWYQMDmFJcpcuavWGlKlrgplrtzFbMWWIqACCDbGzCAgqtxGWev3+VwMs3pb1vp9qnKH9bqnYac6oH4/yEh/D6g5hSVBqgxAa0KYQUDsLD5Z64L3fUZSiatCO4tPBq4oNAsBFUCoIMwgII6V1x1kmrIfgo+ACiBUEGYQEAmxMfXv1Ij9EHwEVAChgjCDgBiaGq8kZ4zqer7FoQuDRoemxgeyLDQDARVAqCDMICAiIxxaMClNkmoFmurvF0xK43FeixBQAYQKwgwCZmL/JC2bMUiJTu+/1BOdMTyWbSECKoBQ4TDGhPWjBmVlZXI6nXK5XIqLiwt2ORATrIUb5pkB0BIac/0mzABoNgIqAH9rzPWbGYABNFtkhEPDel4W7DIAtFKMmQEAAFYjzAAAAKsRZgAAgNUIMwAAwGqEGQAAYDXCDAAAsBphBgAAWI0wAwAArEaYAQAAVmtSmCkuLtaBAwdqbT9w4IC++uqr5tYEAADQYE0KM7NmzdK2bdtqbd+xY4dmzZrV3JoQAFVuo4KiE3pjzxEVFJ1QlTusl+hCmOD3FoAvTVqbaffu3Ro+fHit7T/84Q81d+7cZheFlsUqx7ARv7cA6tKkOzMOh0Pl5eW1trtcLlVVVTW7KLScnMISZa7c5XVBkKRSV4UyV+5STmFJkCoD6sbvLYCLaVKYycjIUHZ2tldwqaqqUnZ2tkaMGOG34uBfVW6jrPX75OvGfPW2rPX7uHWPkMLvLYD6NKmbafHixcrIyFCfPn00cuRISdIHH3ygsrIybdq0ya8Fwn92Fp+s9Zft9xlJJa4K7Sw+qWE9LwtcYcBF8HsLoD5NujOTlpamTz/9VFOnTtWxY8dUXl6uf/qnf9IXX3yh/v37+7tG+Mmx8rovCE3ZDwgEfm8B1KdJd2YkKTk5Wb/5zW/8WQtaWEJsjF/3AwKB31sA9WnypHkffPCBZsyYofT0dB05ckSS9Morryg/P99vxcG/hqbGK8kZI0cdrzt04emQoanxgSwLuCh+bwHUp0lhZvXq1ZowYYLatWunXbt2qbKyUtKFp5m4WxO6IiMcWjApTZJqXRiqv18wKU2REXVdNoDA4/cWQH2aFGaeeOIJLV++XC+88ILatm3r2T58+HDt2rXLb8XB/yb2T9KyGYOU6PS+JZ/ojNGyGYOYrwMhid9bABfTpDEz+/fvV0ZGRq3tTqdT33zzTXNrQgub2D9J49IStbP4pI6VVygh9sItev6yRSjj9xZAXZoUZhITE3Xw4EF1797da3t+fr569Ojhj7rQwiIjHDzGCuvwewvAlyaFmbvuukv33XefXnzxRTkcDh09elQFBQWaP3++Hn30UX/XCEi6MHlauP1VHo5tAoBAa1KYefjhh+V2u3X99dfr7NmzysjIUHR0tObPn697773X3zUCYbkuTzi2CQCCwWGMafIc4N9++60OHjyo06dPKy0tTR06dPBnbX5RVlYmp9Mpl8uluLi4YJeDJqhel6fmL2r1/QsbB4CGY5sAwJ8ac/1u8jwzkhQVFaW0tDT17dtX7733nj7//PPmfFzYqnIbFRSd0Bt7jqig6ARryDRCOK7LE45tAoBgalI309SpU5WRkaG5c+fq3LlzGjJkiIqLi2WM0Wuvvaaf/OQn/q7TWnQlNE84rssTjm0CgGBq0p2ZLVu2eBaYXLt2rdxut7755hv953/+p5544gm/Fmiz6q6EmheuUleFMlfuUk5hSZAqs0c4rssTjm0CgGBqUphxuVyKj78wdXhOTo5+8pOf6JJLLtFNN92kAwcO+LVAW9GV4B/huC5POLYJAIKpSWEmJSVFBQUFOnPmjHJycjR+/HhJ0qlTpxQTwz/AUuO6ElC3cFyXJxzbBADB1KQwM2/ePP385z9Xly5dlJSUpNGjR0u60P109dVX+7M+a9GV4B/huC5POLYJAIKpSWHmnnvu0fbt2/Xiiy9q27Ztioi48DE9evTQokWL/FqgrehK8J9wXJcnHNsEAMHSpHlmHnjggQbvu2TJksZ+vF8Fa56ZKrfRiMWbVOqq8DluxqELF678fxvLX+ANFI6z5YZjmwDAHxpz/W7So9m7d+/W7t27df78efXp00eS9OWXXyoyMlKDBg3y7OdwtN5/lKu7EjJX7pJD8go0dCU0TTiuyxOObQKAQGtSmJk0aZJiY2P18ssv69JLL5V0YfDv7bffrpEjR+rBBx/0a5G2qu5KqDnPTCLzzABNwp0sAL40qZvpiiuu0MaNG3XVVVd5bS8sLNT48eN19OhRvxXYXKGwnAH/AAPNxwSUQOvS4t1MZWVlOn78eK3tx48fV3l5eVM+MqzRlQA0T11rWVVPQMmgaaB1a9LTTFOmTNHtt9+uNWvW6C9/+Yv+8pe/aPXq1brzzjt1yy23+LtGAK0YE1ACqE+T7swsX75c8+fP189+9jOdP3/+wge1aaM777xTv/vd7/xaIIDWjbWsANSnSWHmkksu0XPPPaff/e53KioqkiT17NlT7du392txAMAElADq06QwU619+/a65ppr/FULANTCBJQA6tOkMTMAECisZQWgPoQZACGNtawA1IcwAyDksZYVgItp1pgZAAiUif2TNC4tkQkoAdRCmAFgDSagBOALYQYA/IBlS4DgCeqYmS1btmjSpElKTk6Ww+HQunXrvF4/ffq05s6dqy5duqhdu3ZKS0vT8uXLg1MsANQhp7BEIxZv0q0vbNd9r+3RrS9s14jFm5RTWBLs0gCPKrdRQdEJvbHniAqKToTVrNlBvTNz5swZDRgwQHfccYfPZRAeeOABbdq0SStXrlT37t21ceNG3XPPPUpOTtbNN98chIoBwBvrRsEG4b5Qa1DvzNx444164oknNGXKFJ+vb9u2TTNnztTo0aPVvXt33X333RowYIB27twZ4EoBoDbWjYINqgN3zWVBqgN3ONxBDOlHs9PT0/Xmm2/qyJEjMsYoNzdXX375pcaPH1/neyorK1VWVub1BQAtoTHrRgHB0FoCd0iHmWeffVZpaWnq0qWLoqKiNHHiRC1dulQZGRl1vic7O1tOp9PzlZKSEsCKAbQmrBuFUNdaAnfIh5nt27frzTff1Mcff6ynn35ac+bM0XvvvVfnex555BG5XC7P1+HDhwNYMYDWhHWjEOpaS+AO2Uezz507p1/+8pdau3atbrrpJknSNddcoz179uipp57SDTfc4PN90dHRio6ODmSpAFqp6nWjSl0VPm/jO3RhlmLWjUKwtGTgDqXpCEI2zJw/f17nz59XRIT3zaPIyEi53e4gVQUAf1e9blTmyl1ySF6BhnWjEApaKnCH2tNRQe1mOn36tPbs2aM9e/ZIkoqLi7Vnzx4dOnRIcXFxGjVqlB566CHl5eWpuLhYK1as0B//+Mc6n34CgEBj3SiEspZYqDUUn45yGGOCNoQ5Ly9PY8aMqbV95syZWrFihUpLS/XII49o48aNOnnypLp166a7775b999/vxyOhh34srIyOZ1OuVwuxcXF+bsJACAptG65AzX5605KldtoxOJNdQ4qrr7Tk/9vY5v9+9+Y63dQw0wgEGYAAPBP4C4oOqFbX9he737/c9cPm72OWmOu3yE7ZgYAAPiPPxZqDdWno0L60WwAABA6QnU6AsIMAABokOqno+rqnHLowlicQE9HQJgBAAAN0hJPR/kDYQYAGqHKbVRQdEJv7DmigqIT1q9pAzRWKE5HwABgAGigUJsoDAiWif2TNC4tMWSmI+DRbABogOqJwmr+g1n9TzcT5AH+1ZjrN91MAFCPKrdR1vp9PqeDr96WtX4fXU5AkBBmAKAeO4tP1jnjqXQh0JS4KrSz+GTgigLgQZgBgHqE6kRhAC4gzABAPUJ1ojAAF/A0EwDUo3qisFJXhc9xM9WL6wV6ojCgpdi2cCphBgDqUT1RWObKXXJIXoEmmBOF1WTbBQihycYpCHg0GwAaKJT/kQ/l2mCPUJqCoDHXb8IMADRCKN79CKULEOxV5TYasXhTnU/uVXen5v/b2ID8zjfm+k03EwA0QmSEQ8N6XhbsMjzqmwPHoQtz4IxLSwx66EJoa8wUBKH0/4DE00wALMGaSL4xBw78xeYpCLgzAyDkMR6kbjZfgBBabJ6CgDszAEJa9XiQmncfSl0Vyly5SzmFJUGqLDTYfAFCaKmegqCuzkiHLvwREYpTEBBmAIQs1kSqn80XIISW6ikIJNX6fQqlKQh8IcwACFmMB6mfzRcghJ6J/ZO0bMYgJTq97+QlOmNC+qk4xswACFmMB2mY6gtQzXFFiYwrQhNM7J+kcWmJITcFwcUQZgCELMaDNJyNFyCErlCbgqA+hBkAIYs1kRrHtgsQ4C+MmQEQshgPAqAhCDMAQpqtAxIBBA7dTABCnu3jQUJxPScgnBBmAEu09gtiZIRDQ1PjPcdgZ/FJK44BsxcDLY8wA1iAC6Kdx6Cu1ayrZy+mmwzwD8bMACGO6fztPAbMXgwEDmEGCGFcEO09BsxeDAQOYQYIYVwQ7T0GzF4MBA5jZpqotQ/GRGBwQbT3GDB7MRA4hJkmsHEgIuzEBbHxxyBU/tBg9mIgcAgzjcTTCQgkLoiNOwah9IdG9ezFmSt3ySF51c7sxYB/MWamEWwdiAh7MZ1/w4/Bu/tKQ+6JJ2YvBgLDYYwJ6ytvWVmZnE6nXC6X4uLimvVZBUUndOsL2+vd73/u+iGLvcGvQumOQ7Bc7BiMS0vUiMWb6hwoXH33Jv/fxgYl+IVK1xdgk8Zcv+lmagRbByLCfrZP5+8PFzsGBUUnGvzEUzD+0GA1a6BlEWYagcGYCCYuiHUfA/7QAFo3xsw0QvVAxLr+Fnbowm3vcB6MCYQi/tAAWjfCTCMwGLPhqtxGBUUn9MaeIyooOsGgaLQo/tAAWje6mRqp+umEmgMRE1vZYMyLYbAqAo3HoIHWjaeZmoinE3yrax6e6iPD46hoSQRpIHw05vpNmIHfVLlNSD8ei9aBPzSA8MCj2QiKxiwI2NqfykHL4akvoPVhADD8hsdjAQDBQJiB3/B4LAAgGAgz8BsejwUABANhBn7DPDwAgGAgzMCvWCUYABBoPM0Evwv3RRF59BcAQgthBi0iXB+PZVI2AAg9dDMBDVQ9u3HNuXRKXRXKXLlLOYUlQaoMAFo3wgzQAFVuo6z1+2ot0yD9fR2grPX7WFATAIKAMAM0QGNmNwYABBZhBmgAZjcGgNBFmAEagNmNASB0EWaABmB2YwAIXYQZoAGY3RgAQhdhBmggZjcGgNDEpHlAI4T77MYAYCPCDNBI4Tq7MQDYim4mAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVghpmtmzZokmTJik5OVkOh0Pr1q2rtc/nn3+um2++WU6nU+3bt9eQIUN06NChwBcLAABCUlDDzJkzZzRgwAAtXbrU5+tFRUUaMWKE+vbtq7y8PH366ad69NFHFRPDysQAAOAChzHGBLsISXI4HFq7dq0mT57s2TZ9+nS1bdtWr7zySpM/t6ysTE6nUy6XS3FxcX6oFAAAtLTGXL9DdsyM2+3WW2+9pd69e2vChAlKSEjQdddd57Mr6vsqKytVVlbm9QUAAMJXyIaZY8eO6fTp03ryySc1ceJEbdy4UVOmTNEtt9yizZs31/m+7OxsOZ1Oz1dKSkoAqwYAAIEWst1MR48e1RVXXKFbb71Vr776qme/m2++We3bt9f//M//+PycyspKVVZWer4vKytTSkoK3UxAK1flNqx2DlikMd1MIbtq9uWXX642bdooLS3Na3u/fv2Un59f5/uio6MVHR3d0uUBsEhOYYmy1u9TiavCsy3JGaMFk9I0sX9SECsD4A8h280UFRWlIUOGaP/+/V7bv/zyS3Xr1i1IVQGwTU5hiTJX7vIKMpJU6qpQ5spdyiksCVJlAPwlqHdmTp8+rYMHD3q+Ly4u1p49exQfH6+uXbvqoYce0rRp05SRkaExY8YoJydH69evV15eXvCKBmCNKrdR1vp98tWXbiQ5JGWt36dxaYl0OQEWC+qdmY8++kgDBw7UwIEDJUkPPPCABg4cqF//+teSpClTpmj58uX67W9/q6uvvlp/+MMftHr1ao0YMSKYZQOwxM7ik7XuyHyfkVTiqtDO4pOBKwqA3wX1zszo0aNV3/jjO+64Q3fccUeAKgIQTo6V1x1kmrIfgNAUsmNmAKC5EmIbNlt4Q/cDEJoIMwDC1tDUeCU5Y1TXaBiHLjzVNDQ1PpBlAfAzwgyAsBUZ4dCCSRemd6gZaKq/XzApjcG/gOUIMwDC2sT+SVo2Y5ASnd5dSYnOGC2bMYh5ZoAwELKT5gGAv0zsn6RxaYnMAAyEKcIMgFYhMsKhYT0vC3YZAFoA3UwAAMBqhBkAAGA1upmAIGAFZwDwH8IMEGCs4AwA/kU3ExBArOAMAP5HmAECpL4VnKULKzhXuS++XhkAwBthBggQVnBuvarcRgVFJ/TGniMqKDpBYAX8jDEzQICwgnPrxBgpoOVxZwYIEFZwbn0YIwUEBmEGCBBWcG5dGCMFBA5hBggQVnBuXRgjBQQOYQYIIFZwbj0YIwUEDgOAgQBjBefWgTFSQOAQZoAgYAXn8Fc9RqrUVeFz3IxDF+7IMUYKaD66mQCgBTBGCggcwgwAtBDGSAGBQTcTALQgxkgBLY8wAwAtjDFSQMuimwkAAFiNMAMAAKxGmAEAAFYjzAAAAKsRZgAAgNUIMwAAwGqEGQAAYDXCDAAAsBphBgAAWI0wAwAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAaoQZAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrtQl2AQAQCFVuo53FJ3WsvEIJsTEamhqvyAhHsMsC4AeEGQBhL6ewRFnr96nEVeHZluSM0YJJaZrYPymIlQHwB7qZAIS1nMISZa7c5RVkJKnUVaHMlbuUU1gSpMoA+AthBkDYqnIbZa3fJ+PjteptWev3qcrtaw8AtiDMAGiUKrdRQdEJvbHniAqKToR0ENhZfLLWHZnvM5JKXBXaWXwycEUB8DvGzABoMNvGnhwrrzvINGU/AKGJOzMAGsTGsScJsTF+3Q9AaCLMAKiXrWNPhqbGK8kZo7oewHbowp2loanxgSwLgJ8RZgDUy9axJ5ERDi2YlCZJtQJN9fcLJqUx3wxgOcIMgHrZPPZkYv8kLZsxSIlO766kRGeMls0YFJJjfQA0DgOAAdTL9rEnE/snaVxaIjMAA2GKMAOgXtVjT0pdFT7HzTh04U5HKI89iYxwaFjPy4JdBoAWQDcTgHox9gRAKCPMAGgQxp4ACFV0MwFoMMaeAAhFhBkAjcLYEwChhm4mAABgNcIMAACwGmEGAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGC1sJ8B2JgLa/yWlZUFuRIAANBQ1dft6uv4xYR9mCkvL5ckpaSkBLkSAADQWOXl5XI6nRfdx2EaEnks5na7dfToUcXGxsrh8M9ieGVlZUpJSdHhw4cVFxfnl88MRa2hna2hjRLtDCetoY0S7QwnTW2jMUbl5eVKTk5WRMTFR8WE/Z2ZiIgIdenSpUU+Oy4uLmx/+b6vNbSzNbRRop3hpDW0UaKd4aQpbazvjkw1BgADAACrEWYAAIDVCDNNEB0drQULFig6OjrYpbSo1tDO1tBGiXaGk9bQRol2hpNAtDHsBwADAIDwxp0ZAABgNcIMAACwGmEGAABYjTADAACsRpjxYcuWLZo0aZKSk5PlcDi0bt26i+6fl5cnh8NR66u0tDQwBTdBdna2hgwZotjYWCUkJGjy5Mnav39/ve/705/+pL59+yomJkZXX321/vd//zcA1TZNU9q4YsWKWucxJiYmQBU3zbJly3TNNdd4JqQaNmyY3n777Yu+x6bzWK2x7bTxXNb05JNPyuFwaN68eRfdz8bz+X0NaaeN53PhwoW1au7bt+9F32PbuWxsG1vqPBJmfDhz5owGDBigpUuXNup9+/fvV0lJiecrISGhhSpsvs2bN2vOnDnavn273n33XZ0/f17jx4/XmTNn6nzPtm3bdOutt+rOO+/U7t27NXnyZE2ePFmFhYUBrLzhmtJG6cIsld8/j19//XWAKm6aLl266Mknn9THH3+sjz76SGPHjtWPf/xjffbZZz73t+08VmtsOyX7zuX3ffjhh3r++ed1zTXXXHQ/W89ntYa2U7LzfF511VVeNefn59e5r63nsjFtlFroPBpclCSzdu3ai+6Tm5trJJlTp04FpKaWcOzYMSPJbN68uc59pk6dam666Savbdddd53553/+55Yuzy8a0saXXnrJOJ3OwBXVQi699FLzhz/8wedrtp/H77tYO20+l+Xl5ebKK6807777rhk1apS577776tzX5vPZmHbaeD4XLFhgBgwY0OD9bTyXjW1jS51H7sz40Q9+8AMlJSVp3Lhx2rp1a7DLaRSXyyVJio+Pr3OfgoIC3XDDDV7bJkyYoIKCghatzV8a0kZJOn36tLp166aUlJR6//IPNVVVVXrttdd05swZDRs2zOc+tp9HqWHtlOw9l3PmzNFNN91U6zz5YvP5bEw7JTvP54EDB5ScnKwePXro5z//uQ4dOlTnvraey8a0UWqZ80iY8YOkpCQtX75cq1ev1urVq5WSkqLRo0dr165dwS6tQdxut+bNm6fhw4erf//+de5XWlqqzp07e23r3LlzSI8NqtbQNvbp00cvvvii3njjDa1cuVJut1vp6en6y1/+EsBqG2/v3r3q0KGDoqOjNXv2bK1du1ZpaWk+97X5PDamnbaey9dee027du1SdnZ2g/a39Xw2tp02ns/rrrtOK1asUE5OjpYtW6bi4mKNHDlS5eXlPve38Vw2to0tdh79fq8nzKgB3Uy+ZGRkmBkzZvi/oBYwe/Zs061bN3P48OGL7te2bVvz6quvem1bunSpSUhIaMny/KKhbazp22+/NT179jT//u//3kKV+UdlZaU5cOCA+eijj8zDDz9sLr/8cvPZZ5/53Nfm89iYdtZkw7k8dOiQSUhIMJ988olnW33dLzaez6a0syYbzmdNp06dMnFxcXV2jdp4Lmuqr401+es8tmleFEJdhg4dWu8gqFAwd+5cbdiwQVu2bFGXLl0uum9iYqL++te/em3761//qsTExJYssdka08aa2rZtq4EDB+rgwYMtVJ1/REVFqVevXpKkwYMH68MPP9Tvf/97Pf/887X2tfU8So1rZ002nMuPP/5Yx44d06BBgzzbqqqqtGXLFv3Xf/2XKisrFRkZ6fUeG89nU9pZkw3ns6aOHTuqd+/eddZs47msqb421uSv80g3UwvZs2ePkpKSgl1GnYwxmjt3rtauXatNmzYpNTW13vcMGzZM77//vte2d99996JjFoKpKW2sqaqqSnv37g3pc+mL2+1WZWWlz9dsO48Xc7F21mTDubz++uu1d+9e7dmzx/N17bXX6uc//7n27Nnj8wJv4/lsSjtrsuF81nT69GkVFRXVWbON57Km+tpYk9/OY7Pu64Sp8vJys3v3brN7924jySxZssTs3r3bfP3118YYYx5++GFz2223efZ/5plnzLp168yBAwfM3r17zX333WciIiLMe++9F6wm1CszM9M4nU6Tl5dnSkpKPF9nz5717HPbbbeZhx9+2PP91q1bTZs2bcxTTz1lPv/8c7NgwQLTtm1bs3fv3mA0oV5NaWNWVpZ55513TFFRkfn444/N9OnTTUxMTIO7MoLh4YcfNps3bzbFxcXm008/NQ8//LBxOBxm48aNxhj7z2O1xrbTxnPpS83ul3A5nzXV104bz+eDDz5o8vLyTHFxsdm6dau54YYbzOWXX26OHTtmjAmPc9nYNrbUeSTM+FD9qHXNr5kzZxpjjJk5c6YZNWqUZ//Fixebnj17mpiYGBMfH29Gjx5tNm3aFJziG8hX+ySZl156ybPPqFGjPG2u9vrrr5vevXubqKgoc9VVV5m33norsIU3QlPaOG/ePNO1a1cTFRVlOnfubH70ox+ZXbt2Bb74RrjjjjtMt27dTFRUlOnUqZO5/vrrPRd4Y+w/j9Ua204bz6UvNS/y4XI+a6qvnTaez2nTppmkpCQTFRVlrrjiCjNt2jRz8OBBz+vhcC4b28aWOo8OY4xp3r0dAACA4GHMDAAAsBphBgAAWI0wAwAArEaYAQAAViPMAAAAqxFmAACA1QgzAADAaoQZANbJy8uTw+HQN9980+D3LFy4UD/4wQ9arCYAwUOYAdCili9frtjYWH333XeebadPn1bbtm01evRor32rQ0pRUdFFPzM9PV0lJSVyOp1+rXX06NGaN2+eXz8TQMsjzABoUWPGjNHp06f10UcfebZ98MEHSkxM1I4dO1RRUeHZnpubq65du6pnz54X/cyoqCglJibK4XC0WN0A7EGYAdCi+vTpo6SkJOXl5Xm25eXl6cc//rFSU1O1fft2r+1jxoyR2+1Wdna2UlNT1a5dOw0YMEB//vOfvfar2c30wgsvKCUlRZdccommTJmiJUuWqGPHjrXqeeWVV9S9e3c5nU5Nnz5d5eXlkqRZs2Zp8+bN+v3vfy+HwyGHw6GvvvrK34cDQAsgzABocWPGjFFubq7n+9zcXI0ePVqjRo3ybD937px27NihMWPGKDs7W3/84x+1fPlyffbZZ7r//vs1Y8YMbd682efnb926VbNnz9Z9992nPXv2aNy4cVq0aFGt/YqKirRu3Tpt2LBBGzZs0ObNm/Xkk09Kkn7/+99r2LBhuuuuu1RSUqKSkhKlpKS0wNEA4G9tgl0AgPA3ZswYzZs3T999953OnTun3bt3a9SoUTp//ryWL18uSSooKFBlZaVGjx6ttLQ0vffeexo2bJgkqUePHsrPz9fzzz+vUaNG1fr8Z599VjfeeKPmz58vSerdu7e2bdumDRs2eO3ndru1YsUKxcbGSpJuu+02vf/++1q0aJGcTqeioqJ0ySWXKDExsSUPBwA/I8wAaHGjR4/WmTNn9OGHH+rUqVPq3bu3OnXqpFGjRun2229XRUWF8vLy1KNHD50+fVpnz57VuHHjvD7j22+/1cCBA31+/v79+zVlyhSvbUOHDq0VZrp37+4JMpKUlJSkY8eO+amVAIKFMAOgxfXq1UtdunRRbm6uTp065bm7kpycrJSUFG3btk25ubkaO3asTp8+LUl66623dMUVV3h9TnR0dLPqaNu2rdf3DodDbre7WZ8JIPgIMwACYsyYMcrLy9OpU6f00EMPebZnZGTo7bff1s6dO5WZmam0tDRFR0fr0KFDPruUfOnTp48+/PBDr201v2+IqKgoVVVVNfp9AIKLMAMgIMaMGaM5c+bo/PnzXiFl1KhRmjt3rr799luNGTNGsbGxmj9/vu6//3653W6NGDFCLpdLW7duVVxcnGbOnFnrs++9915lZGRoyZIlmjRpkjZt2qS333670Y9ud+/eXTt27NBXX32lDh06KD4+XhERPCcBhDr+LwUQEGPGjNG5c+fUq1cvde7c2bN91KhRKi8v9zzCLUmPP/64Hn30UWVnZ6tfv36aOHGi3nrrLaWmpvr87OHDh2v58uVasmSJBgwYoJycHN1///2KiYlpVI3z589XZGSk0tLS1KlTJx06dKjpDQYQMA5jjAl2EQDgb3fddZe++OILffDBB8EuBUALo5sJQFh46qmnNG7cOLVv315vv/22Xn75ZT333HPBLgtAAHBnBkBYmDp1qvLy8lReXq4ePXro3nvv1ezZs4NdFoAAIMwAAACrMQAYAABYjTADAACsRpgBAABWI8wAAACrEWYAAIDVCDMAAMBqhBkAAGA1wgwAALAaYQYAAFjt/wHA6Er4kdrkggAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-0.1747158787134049\n" + ] + } + ], + "source": [ + "plt.scatter(df['wt'], df['qsec'])\n", + "plt.title('Weight vs qsec')\n", + "plt.xlabel('Weight')\n", + "plt.ylabel('qsec')\n", + "plt.show()\n", + "\n", + "print(df['wt'].corr(df['qsec']))" + ] + }, + { + "cell_type": "markdown", + "id": "b4f1fabe", + "metadata": {}, + "source": [ + "- 이런 데이터셋을 분석해서 얻을 수 있는 경제적 가치가 무엇일까?, 어떤 비즈니스 상황에서 어떤 경제적 가치를 얻을 수 있을까?\n", + " - 나의 생각\n", + " - M1 활동에서는 지표들간의 상관관계를 찾는 작업을 진행했다.\n", + " - 데이터 분석의 목적이 무엇일까?\n", + " - 좋은 의사결정을 할 수 있는 근거를 마련하기.\n", + " - 좋은 의사결정이란?\n", + " - 의도와 근거가 명확한 결정..? -> 좀 더 생각해보기.\n", + " - 의사결정의 의도: 판매량 증가, 비용 절감, 효율 향상 등등 -> 이것들이 결국엔 경제적 가치로 이어진다.\n", + " - 근거가 있으면 설득할 수 있다.\n", + " - 지표들간의 상관관계를 찾으면 뭐가 좋으냐?\n", + " - 통제가능한 지표를 조절하여 통제불가능한 지표를 조절할 수 있다.\n", + " - 상관관계를 통해 부족한 데이터를 예측할 수 있다.\n", + " - 데이터를 분석하기 전에 목적(=경제적 가치)을 명확히 하는 것이 중요하다.\n", + " - 내가 비즈니스맨이라면 `1. 연도별로 선호하는 자동차 특성`, `2. 나이대별 선호하는 자동차 특성`이 궁금할 것 같다.\n", + " 1. 연도별로 선호하는 자동차 특성 \n", + " - 목적: 시장이 변화하는 추세를 파악하고 싶다 -> 이를 바탕으로 선호되는 특성에 맞는 신규 자동차 개발(경제적 가치)\n", + " - 방법: 연도별 판매량 column을 추가로 수집하고 다른 칼럼과 상관관계를 찾아본다.\n", + " 2. 나이대별 선호하는 자동차 특성\n", + " - 목적: 나이대별 선호하는 자동차 특성을 파악하고 싶다 -> 나이대별 맞춤 마케팅 전략 수립(경제적 가치)\n", + " - 방법: 나이대/성별 당 판매량 column을 추가로 수집하고 다른 칼럼과 상관관계를 찾아본다.\n", + " - 팀의 생각\n", + " - groupby했던 것 처럼 차량을 여러 카테고리로 분류하여 클러스터링 할 수 있다.\n", + " - 연비, 무게, 마력 등 지표간의 상관관계를 통해 최적화된 차량 설계가 가능하다.\n", + " - 소비자 선호 데이터를 추가하여 마케팅 전략 수립에 도움이 될 수 있다.\n", + " - 종합\n", + " - 팀원들은 현재 데이터를 기준으로, 나는 좀 더 일반적인 차원에서 생각해보았다.\n", + " - 공통적으로 데이터는 현재 상황을 해석하고 미래를 예측하는 근거라고 생각했다." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/missions/W1/M2.ipynb b/missions/W1/M2.ipynb new file mode 100644 index 0000000..775bfa1 --- /dev/null +++ b/missions/W1/M2.ipynb @@ -0,0 +1,2156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "\n", + "conn = sqlite3.connect('./data.db')\n", + "\n", + "cursor = conn.cursor()\n", + "\n", + "sql = open('./create.sql', 'r').read()\n", + "\n", + "def init_db():\n", + " cursor.executescript(sql)\n", + "\n", + "def execute_and_print_query(query, limit=5):\n", + " cursor.execute(query)\n", + " print(query)\n", + " i = 0\n", + " for row in cursor:\n", + " if i >= limit:\n", + " print('...')\n", + " break\n", + " print(row)\n", + " i += 1\n", + " print('-' * 50)\n", + "\n", + "init_db()" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Syntax\n", + "execute_and_print_query('SELECT * FROM Customers')" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT CustomerName, City FROM Customers\n", + "('Alfreds Futterkiste', 'Berlin')\n", + "('Ana Trujillo Emparedados y helados', 'México D.F.')\n", + "('Antonio Moreno Taquería', 'México D.F.')\n", + "('Around the Horn', 'London')\n", + "('Berglunds snabbköp', 'Luleå')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Select\n", + "execute_and_print_query('SELECT CustomerName, City FROM Customers')\n", + "execute_and_print_query('SELECT * FROM Customers')" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT DISTINCT Country FROM Customers\n", + "('Germany',)\n", + "('Mexico',)\n", + "('UK',)\n", + "('Sweden',)\n", + "('France',)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Country FROM Customers\n", + "('Germany',)\n", + "('Mexico',)\n", + "('Mexico',)\n", + "('UK',)\n", + "('Sweden',)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT COUNT(DISTINCT Country) FROM Customers\n", + "(21,)\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Select Distinct\n", + "execute_and_print_query('SELECT DISTINCT Country FROM Customers')\n", + "execute_and_print_query('SELECT Country FROM Customers')\n", + "execute_and_print_query('SELECT COUNT(DISTINCT Country) FROM Customers')" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE CustomerID=1\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerID > 80\n", + "(81, 'Tradição Hipermercados', 'Anabela Domingues', 'Av. Inês de Castro, 414', 'São Paulo', '05634-030', 'Brazil')\n", + "(82, \"Trail''s Head Gourmet Provisioners\", 'Helvetius Nagy', '722 DaVinci Blvd.', 'Kirkland', '98034', 'USA')\n", + "(83, 'Vaffeljernet', 'Palle Ibsen', 'Smagsløget 45', 'Århus', '8200', 'Denmark')\n", + "(84, 'Victuailles en stock', 'Mary Saveley', '2, rue du Commerce', 'Lyon', '69004', 'France')\n", + "(85, 'Vins et alcools Chevalier', 'Paul Henriot', \"59 rue de l''Abbaye\", 'Reims', '51100', 'France')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Where\n", + "execute_and_print_query('SELECT * FROM Customers WHERE CustomerID=1')\n", + "execute_and_print_query('SELECT * FROM Customers WHERE CustomerID > 80')" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Products ORDER BY Price\n", + "(33, 'Geitost', 15, 4, '500 g', 2.5)\n", + "(24, 'Guaraná Fantástica', 10, 1, '12 - 355 ml cans', 4.5)\n", + "(13, 'Konbu', 6, 8, '2 kg box', 6)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products ORDER BY Price DESC\n", + "(38, 'Côte de Blaye', 18, 1, '12 - 75 cl bottles', 263.5)\n", + "(29, 'Thüringer Rostbratwurst', 12, 6, '50 bags x 30 sausgs.', 123.79)\n", + "(9, 'Mishi Kobe Niku', 4, 6, '18 - 500 g pkgs.', 97)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products ORDER BY ProductName\n", + "(17, 'Alice Mutton', 7, 6, '20 - 1 kg tins', 39)\n", + "(3, 'Aniseed Syrup', 1, 2, '12 - 550 ml bottles', 10)\n", + "(40, 'Boston Crab Meat', 19, 8, '24 - 4 oz tins', 18.4)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products ORDER BY ProductName DESC\n", + "(47, 'Zaanse koeken', 22, 3, '10 - 4 oz boxes', 9.5)\n", + "(64, 'Wimmers gute Semmelknödel', 12, 5, '20 bags x 4 pieces', 33.25)\n", + "(63, 'Vegie-spread', 7, 2, '15 - 625 g jars', 43.9)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers ORDER BY Country, CustomerName\n", + "(12, 'Cactus Comidas para llevar', 'Patricio Simpson', 'Cerrito 333', 'Buenos Aires', '1010', 'Argentina')\n", + "(54, 'Océano Atlántico Ltda.', 'Yvonne Moncada', 'Ing. Gustavo Moncada 8585 Piso 20-A', 'Buenos Aires', '1010', 'Argentina')\n", + "(64, 'Rancho grande', 'Sergio Gutiérrez', 'Av. del Libertador 900', 'Buenos Aires', '1010', 'Argentina')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers ORDER BY Country ASC, CustomerName DESC\n", + "(64, 'Rancho grande', 'Sergio Gutiérrez', 'Av. del Libertador 900', 'Buenos Aires', '1010', 'Argentina')\n", + "(54, 'Océano Atlántico Ltda.', 'Yvonne Moncada', 'Ing. Gustavo Moncada 8585 Piso 20-A', 'Buenos Aires', '1010', 'Argentina')\n", + "(12, 'Cactus Comidas para llevar', 'Patricio Simpson', 'Cerrito 333', 'Buenos Aires', '1010', 'Argentina')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Order By\n", + "execute_and_print_query('SELECT * FROM Products ORDER BY Price', limit=3)\n", + "execute_and_print_query('SELECT * FROM Products ORDER BY Price DESC', limit=3)\n", + "execute_and_print_query('SELECT * FROM Products ORDER BY ProductName', limit=3)\n", + "execute_and_print_query('SELECT * FROM Products ORDER BY ProductName DESC', limit=3)\n", + "execute_and_print_query('SELECT * FROM Customers ORDER BY Country, CustomerName', limit=3)\n", + "execute_and_print_query('SELECT * FROM Customers ORDER BY Country ASC, CustomerName DESC', limit=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%'\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Germany' AND City = 'Berlin' AND PostalCode > 12000\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Spain' AND (CustomerName LIKE 'G%' OR CustomerName LIKE 'R%')\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(69, 'Romero y tomillo', 'Alejandra Camino', 'Gran Vía, 1', 'Madrid', '28001', 'Spain')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%' OR CustomerName LIKE 'R%'\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(64, 'Rancho grande', 'Sergio Gutiérrez', 'Av. del Libertador 900', 'Buenos Aires', '1010', 'Argentina')\n", + "(65, 'Rattlesnake Canyon Grocery', 'Paula Wilson', '2817 Milton Dr.', 'Albuquerque', '87110', 'USA')\n", + "(66, 'Reggiani Caseifici', 'Maurizio Moroni', 'Strada Provinciale 124', 'Reggio Emilia', '42100', 'Italy')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL And\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Germany' AND City = 'Berlin' AND PostalCode > 12000\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Spain' AND (CustomerName LIKE 'G%' OR CustomerName LIKE 'R%')\")\n", + "# (spain and g) or r\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%' OR CustomerName LIKE 'R%'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE Country = 'Germany' OR Country = 'Spain'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(17, 'Drachenblut Delikatessend', 'Sven Ottlieb', 'Walserweg 21', 'Aachen', '52066', 'Germany')\n", + "(22, 'FISSA Fabrica Inter. Salchichas S.A.', 'Diego Roel', 'C/ Moralzarzal, 86', 'Madrid', '28034', 'Spain')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE City = 'Berlin' OR CustomerName LIKE 'G%' OR Country = 'Norway'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(31, 'Gourmet Lanchonetes', 'André Fonseca', 'Av. Brasil, 442', 'Campinas', '04876-786', 'Brazil')\n", + "(32, 'Great Lakes Food Market', 'Howard Snyder', '2732 Baker Blvd.', 'Eugene', '97403', 'USA')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Spain' AND (CustomerName LIKE 'G%' OR CustomerName LIKE 'R%')\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(69, 'Romero y tomillo', 'Alejandra Camino', 'Gran Vía, 1', 'Madrid', '28001', 'Spain')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%' OR CustomerName LIKE 'R%'\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(64, 'Rancho grande', 'Sergio Gutiérrez', 'Av. del Libertador 900', 'Buenos Aires', '1010', 'Argentina')\n", + "(65, 'Rattlesnake Canyon Grocery', 'Paula Wilson', '2817 Milton Dr.', 'Albuquerque', '87110', 'USA')\n", + "(66, 'Reggiani Caseifici', 'Maurizio Moroni', 'Strada Provinciale 124', 'Reggio Emilia', '42100', 'Italy')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL OR\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Germany' OR Country = 'Spain'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE City = 'Berlin' OR CustomerName LIKE 'G%' OR Country = 'Norway'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Spain' AND (CustomerName LIKE 'G%' OR CustomerName LIKE 'R%')\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Spain' AND CustomerName LIKE 'G%' OR CustomerName LIKE 'R%'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE NOT Country = 'Spain'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName NOT LIKE 'A%'\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "(7, 'Blondel père et fils', 'Frédérique Citeaux', '24, place Kléber', 'Strasbourg', '67000', 'France')\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(9, \"Bon app''\", 'Laurence Lebihans', '12, rue des Bouchers', 'Marseille', '13008', 'France')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerID NOT BETWEEN 10 AND 60\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE City NOT IN ('Paris', 'London')\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE NOT CustomerID > 50\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE NOT CustomerId < 50\n", + "(50, 'Maison Dewey', 'Catherine Dewey', 'Rue Joseph-Bens 532', 'Bruxelles', 'B-1180', 'Belgium')\n", + "(51, 'Mère Paillarde', 'Jean Fresnière', '43 rue St. Laurent', 'Montréal', 'H1J 1C3', 'Canada')\n", + "(52, 'Morgenstern Gesundkost', 'Alexander Feuer', 'Heerstr. 22', 'Leipzig', '4179', 'Germany')\n", + "(53, 'North/South', 'Simon Crowther', 'South House 300 Queensbridge', 'London', 'SW7 1RZ', 'UK')\n", + "(54, 'Océano Atlántico Ltda.', 'Yvonne Moncada', 'Ing. Gustavo Moncada 8585 Piso 20-A', 'Buenos Aires', '1010', 'Argentina')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Not\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE NOT Country = 'Spain'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName NOT LIKE 'A%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerID NOT BETWEEN 10 AND 60\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE City NOT IN ('Paris', 'London')\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE NOT CustomerID > 50\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE NOT CustomerId < 50\")" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country) \n", + " VALUES ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway')\n", + "--------------------------------------------------\n", + "INSERT INTO Customers (CustomerName, City, Country)\n", + " VALUES ('Cardinal', 'Stavanger', 'Norway')\n", + "--------------------------------------------------\n", + "INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)\n", + " VALUES\n", + " ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway'),\n", + " ('Greasy Burger', 'Per Olsen', 'Gateveien 15', 'Sandnes', '4306', 'Norway'),\n", + " ('Tasty Tee', 'Finn Egan', 'Streetroad 19B', 'Liverpool', 'L1 0AA', 'UK')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Insert Into\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country) \n", + " VALUES ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway')\"\"\")\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, City, Country)\n", + " VALUES ('Cardinal', 'Stavanger', 'Norway')\"\"\")\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)\n", + " VALUES\n", + " ('Cardinal', 'Tom B. Erichsen', 'Skagen 21', 'Stavanger', '4006', 'Norway'),\n", + " ('Greasy Burger', 'Per Olsen', 'Gateveien 15', 'Sandnes', '4306', 'Norway'),\n", + " ('Tasty Tee', 'Finn Egan', 'Streetroad 19B', 'Liverpool', 'L1 0AA', 'UK')\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT CustomerName, ContactName, Address FROM Customers WHERE Address IS NULL\n", + "('Cardinal', None, None)\n", + "--------------------------------------------------\n", + "SELECT CustomerName, ContactName, Address FROM Customers WHERE Address IS NOT NULL\n", + "('Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57')\n", + "('Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222')\n", + "('Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312')\n", + "('Around the Horn', 'Thomas Hardy', '120 Hanover Sq.')\n", + "('Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Null Values\n", + "execute_and_print_query(\"SELECT CustomerName, ContactName, Address FROM Customers WHERE Address IS NULL\")\n", + "execute_and_print_query(\"SELECT CustomerName, ContactName, Address FROM Customers WHERE Address IS NOT NULL\")" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UPDATE Customers\n", + " SET ContactName = 'Alfred Schmidt', City= 'Frankfurt'\n", + " WHERE CustomerID = 1\n", + "--------------------------------------------------\n", + "UPDATE Customers\n", + " SET ContactName='Juan'\n", + " WHERE Country='Mexico'\n", + "--------------------------------------------------\n", + "UPDATE Customers SET ContactName='Juan'\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Update\n", + "execute_and_print_query(\"\"\"UPDATE Customers\n", + " SET ContactName = 'Alfred Schmidt', City= 'Frankfurt'\n", + " WHERE CustomerID = 1\"\"\")\n", + "execute_and_print_query(\"\"\"UPDATE Customers\n", + " SET ContactName='Juan'\n", + " WHERE Country='Mexico'\"\"\")\n", + "# WARN! Where가 없으면 모든 로우가 업데이트\n", + "execute_and_print_query(\"\"\"UPDATE Customers SET ContactName='Juan'\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DELETE FROM Customers WHERE CustomerName='Alfreds Futterkiste'\n", + "--------------------------------------------------\n", + "DELETE FROM Customers\n", + "--------------------------------------------------\n", + "DROP TABLE Customers\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Delete\n", + "execute_and_print_query(\"DELETE FROM Customers WHERE CustomerName='Alfreds Futterkiste'\")\n", + "# 테이블의 모든 로우 삭제\n", + "execute_and_print_query(\"DELETE FROM Customers\")\n", + "# 테이블 자체까지 삭제\n", + "execute_and_print_query(\"DROP TABLE Customers\")\n", + "\n", + "init_db()" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers LIMIT 3\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country = 'Germany' LIMIT 3\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "(17, 'Drachenblut Delikatessend', 'Sven Ottlieb', 'Walserweg 21', 'Aachen', '52066', 'Germany')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers ORDER BY CustomerName DESC LIMIT 3\n", + "(91, 'Wolski', 'Zbyszek', 'ul. Filtrowa 68', 'Walla', '01-012', 'Poland')\n", + "(90, 'Wilman Kala', 'Matti Karttunen', 'Keskuskatu 45', 'Helsinki', '21240', 'Finland')\n", + "(89, 'White Clover Markets', 'Karl Jablonski', '305 - 14th Ave. S. Suite 3B', 'Seattle', '98128', 'USA')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Select Top\n", + "# Top은 SQLite에서 지원되지 않음\n", + "# execute_and_print_query(\"SELECT TOP 3 * FROM Customers\")\n", + "# 대신 LIMIT 사용\n", + "execute_and_print_query(\"SELECT * FROM Customers LIMIT 3\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country = 'Germany' LIMIT 3\")\n", + "execute_and_print_query(\"SELECT * FROM Customers ORDER BY CustomerName DESC LIMIT 3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Aggregate Functions\n", + "# MIN, MAX, COUNT, SUM, AVG\n", + "# 보통 Group By와 함께 쓰인다" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT MIN(Price) FROM Products\n", + "(2.5,)\n", + "--------------------------------------------------\n", + "SELECT MAX(Price) FROM Products\n", + "(263.5,)\n", + "--------------------------------------------------\n", + "SELECT MIN(Price) AS SmallestPrice FROM Products\n", + "(2.5,)\n", + "--------------------------------------------------\n", + "SELECT MIN(Price) AS SmallestPrice, CategoryID FROM Products GROUP BY CategoryID\n", + "(4.5, 1)\n", + "(10, 2)\n", + "(9.2, 3)\n", + "(2.5, 4)\n", + "(7, 5)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Min and Max\n", + "execute_and_print_query(\"SELECT MIN(Price) FROM Products\")\n", + "execute_and_print_query(\"SELECT MAX(Price) FROM Products\")\n", + "execute_and_print_query(\"SELECT MIN(Price) AS SmallestPrice FROM Products\")\n", + "execute_and_print_query(\"SELECT MIN(Price) AS SmallestPrice, CategoryID FROM Products GROUP BY CategoryID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT COUNT(*) FROM Products\n", + "(77,)\n", + "--------------------------------------------------\n", + "SELECT COUNT(ProductName) FROM Products\n", + "(77,)\n", + "--------------------------------------------------\n", + "SELECT COUNT(ProductID) FROM Products WHERE Price > 20\n", + "(37,)\n", + "--------------------------------------------------\n", + "SELECT COUNT(DISTINCT Price) FROM Products\n", + "(62,)\n", + "--------------------------------------------------\n", + "SELECT COUNT(*) AS [Number of records] FROM Products\n", + "(77,)\n", + "--------------------------------------------------\n", + "SELECT COUNT(*) AS [Number of records], CategoryID FROM Products GROUP BY CategoryID\n", + "(12, 1)\n", + "(12, 2)\n", + "(13, 3)\n", + "(10, 4)\n", + "(7, 5)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Count\n", + "execute_and_print_query(\"SELECT COUNT(*) FROM Products\")\n", + "execute_and_print_query(\"SELECT COUNT(ProductName) FROM Products\")\n", + "execute_and_print_query(\"SELECT COUNT(ProductID) FROM Products WHERE Price > 20\")\n", + "execute_and_print_query(\"SELECT COUNT(DISTINCT Price) FROM Products\")\n", + "# []를 쓰면 \"\" 없이도 칼럼명에 공백이 가능!\n", + "execute_and_print_query(\"SELECT COUNT(*) AS [Number of records] FROM Products\")\n", + "execute_and_print_query(\"SELECT COUNT(*) AS [Number of records], CategoryID FROM Products GROUP BY CategoryID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT SUM(Quantity) FROM OrderDetails\n", + "(12743,)\n", + "--------------------------------------------------\n", + "SELECT SUM(Quantity) FROM OrderDetails WHERE ProductId = 11\n", + "(182,)\n", + "--------------------------------------------------\n", + "SELECT SUM(Quantity) AS total FROM OrderDetails\n", + "(12743,)\n", + "--------------------------------------------------\n", + "SELECT OrderID, SUM(Quantity) AS [Total Quantity] FROM OrderDetails GROUP BY OrderID\n", + "(10248, 27)\n", + "(10249, 49)\n", + "(10250, 60)\n", + "(10251, 41)\n", + "(10252, 105)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT SUM(Quantity * 10) FROM OrderDetails\n", + "(127430,)\n", + "--------------------------------------------------\n", + "SELECT SUM(Price * Quantity) FROM OrderDetails LEFT JOIN Products ON OrderDetails.ProductID = Products.ProductID\n", + "(386424.23,)\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Sum\n", + "execute_and_print_query(\"SELECT SUM(Quantity) FROM OrderDetails\")\n", + "execute_and_print_query(\"SELECT SUM(Quantity) FROM OrderDetails WHERE ProductId = 11\")\n", + "execute_and_print_query(\"SELECT SUM(Quantity) AS total FROM OrderDetails\")\n", + "execute_and_print_query(\"SELECT OrderID, SUM(Quantity) AS [Total Quantity] FROM OrderDetails GROUP BY OrderID\")\n", + "execute_and_print_query(\"SELECT SUM(Quantity * 10) FROM OrderDetails\")\n", + "execute_and_print_query(\"SELECT SUM(Price * Quantity) FROM OrderDetails LEFT JOIN Products ON OrderDetails.ProductID = Products.ProductID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT AVG(Price) FROM Products\n", + "(28.866363636363637,)\n", + "--------------------------------------------------\n", + "SELECT AVG(Price) FROM Products WHERE CategoryID = 1\n", + "(37.979166666666664,)\n", + "--------------------------------------------------\n", + "SELECT AVG(Price) AS [average price] FROM Products\n", + "(28.866363636363637,)\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE price > (SELECT AVG(price) FROM Products)\n", + "(7, \"Uncle Bob's Organic Dried Pears\", 3, 7, '12 - 1 lb pkgs.', 30)\n", + "(8, 'Northwoods Cranberry Sauce', 3, 2, '12 - 12 oz jars', 40)\n", + "(9, 'Mishi Kobe Niku', 4, 6, '18 - 500 g pkgs.', 97)\n", + "(10, 'Ikura', 4, 8, '12 - 200 ml jars', 31)\n", + "(12, 'Queso Manchego La Pastora', 5, 4, '10 - 500 g pkgs.', 38)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT AVG(Price) AS AveragePrice, CategoryID FROM Products GROUP BY CategoryID\n", + "(37.979166666666664, 1)\n", + "(23.0625, 2)\n", + "(25.16, 3)\n", + "(28.73, 4)\n", + "(20.25, 5)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Avg\n", + "execute_and_print_query(\"SELECT AVG(Price) FROM Products\")\n", + "execute_and_print_query(\"SELECT AVG(Price) FROM Products WHERE CategoryID = 1\")\n", + "execute_and_print_query(\"SELECT AVG(Price) AS [average price] FROM Products\")\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE price > (SELECT AVG(price) FROM Products)\")\n", + "execute_and_print_query(\"SELECT AVG(Price) AS AveragePrice, CategoryID FROM Products GROUP BY CategoryID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE CustomerName LIKE 'a%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE city LIKE 'L_nd__'\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "(16, 'Consolidated Holdings', 'Elizabeth Brown', 'Berkeley Gardens 12 Brewery', 'London', 'WX1 6LT', 'UK')\n", + "(19, 'Eastern Connection', 'Ann Devon', '35 King George', 'London', 'WX3 6FW', 'UK')\n", + "(53, 'North/South', 'Simon Crowther', 'South House 300 Queensbridge', 'London', 'SW7 1RZ', 'UK')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE city LIKE '%L%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "(9, \"Bon app''\", 'Laurence Lebihans', '12, rue des Bouchers', 'Marseille', '13008', 'France')\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE 'La%'\n", + "(40, \"La corne d''abondance\", 'Daniel Tonini', \"67, avenue de l''Europe\", 'Versailles', '78000', 'France')\n", + "(41, \"La maison d''Asie\", 'Annette Roulet', '1 rue Alsace-Lorraine', 'Toulouse', '31000', 'France')\n", + "(42, 'Laughing Bacchus Wine Cellars', 'Yoshi Tannamuri', '1900 Oak St.', 'Vancouver', 'V3F 2K1', 'Canada')\n", + "(43, 'Lazy K Kountry Store', 'John Steel', '12 Orchestra Terrace', 'Walla Walla', '99362', 'USA')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE 'a%' OR CustomerName LIKE 'b%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '%a'\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(13, 'Centro comercial Moctezuma', 'Francisco Chang', 'Sierras de Granada 9993', 'México D.F.', '5022', 'Mexico')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(61, 'Que Delícia', 'Bernardo Batista', 'Rua da Panificadora, 12', 'Rio de Janeiro', '02389-673', 'Brazil')\n", + "(62, 'Queen Cozinha', 'Lúcia Carvalho', 'Alameda dos Canàrios, 891', 'São Paulo', '05487-020', 'Brazil')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE 'b%s'\n", + "(7, 'Blondel père et fils', 'Frédérique Citeaux', '24, place Kléber', 'Strasbourg', '67000', 'France')\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '%or%'\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(36, 'Hungry Coyote Import Store', 'Yoshi Latimer', 'City Center Plaza 516 Main St.', 'Elgin', '97827', 'USA')\n", + "(40, \"La corne d''abondance\", 'Daniel Tonini', \"67, avenue de l''Europe\", 'Versailles', '78000', 'France')\n", + "(43, 'Lazy K Kountry Store', 'John Steel', '12 Orchestra Terrace', 'Walla Walla', '99362', 'USA')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE 'a__%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '_r%'\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(17, 'Drachenblut Delikatessend', 'Sven Ottlieb', 'Walserweg 21', 'Aachen', '52066', 'Germany')\n", + "(20, 'Ernst Handel', 'Roland Mendel', 'Kirchgasse 6', 'Graz', '8010', 'Austria')\n", + "(25, 'Frankenversand', 'Peter Franken', 'Berliner Platz 43', 'München', '80805', 'Germany')\n", + "(26, 'France restauration', 'Carine Schmitt', '54, rue Royale', 'Nantes', '44000', 'France')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country LIKE 'Spain'\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(22, 'FISSA Fabrica Inter. Salchichas S.A.', 'Diego Roel', 'C/ Moralzarzal, 86', 'Madrid', '28034', 'Spain')\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(69, 'Romero y tomillo', 'Alejandra Camino', 'Gran Vía, 1', 'Madrid', '28001', 'Spain')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Like\n", + "# %: 0개 이상의 캐릭터\n", + "# _: 정확히 1개 캐릭터\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'a%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE city LIKE 'L_nd__'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE city LIKE '%L%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'La%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'a%' OR CustomerName LIKE 'b%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '%a'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'b%s'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '%or%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'a__%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '_r%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country LIKE 'Spain'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE CustomerName LIKE 'a%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '%es'\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "(23, 'Folies gourmandes', 'Martine Rancé', '184, chaussée de Tournai', 'Lille', '59000', 'France')\n", + "(31, 'Gourmet Lanchonetes', 'André Fonseca', 'Av. Brasil, 442', 'Campinas', '04876-786', 'Brazil')\n", + "(34, 'Hanari Carnes', 'Mario Pontes', 'Rua do Paço, 67', 'Rio de Janeiro', '05454-876', 'Brazil')\n", + "(47, 'LINO-Delicateses', 'Felipe Izquierdo', 'Ave. 5 de Mayo Porlamar', 'I. de Margarita', '4980', 'Venezuela')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '%mer%'\n", + "(13, 'Centro comercial Moctezuma', 'Francisco Chang', 'Sierras de Granada 9993', 'México D.F.', '5022', 'Mexico')\n", + "(46, 'LILA-Supermercado', 'Carlos González', 'Carrera 52 con Ave. Bolívar #65-98 Llano Largo', 'Barquisimeto', '3508', 'Venezuela')\n", + "(69, 'Romero y tomillo', 'Alejandra Camino', 'Gran Vía, 1', 'Madrid', '28001', 'Spain')\n", + "(81, 'Tradição Hipermercados', 'Anabela Domingues', 'Av. Inês de Castro, 414', 'São Paulo', '05634-030', 'Brazil')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE City LIKE '_ondon'\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "(16, 'Consolidated Holdings', 'Elizabeth Brown', 'Berkeley Gardens 12 Brewery', 'London', 'WX1 6LT', 'UK')\n", + "(19, 'Eastern Connection', 'Ann Devon', '35 King George', 'London', 'WX3 6FW', 'UK')\n", + "(53, 'North/South', 'Simon Crowther', 'South House 300 Queensbridge', 'London', 'SW7 1RZ', 'UK')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE City LIKE 'L___on'\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(11, \"B''s Beverages\", 'Victoria Ashworth', 'Fauntleroy Circus', 'London', 'EC2 5NT', 'UK')\n", + "(16, 'Consolidated Holdings', 'Elizabeth Brown', 'Berkeley Gardens 12 Brewery', 'London', 'WX1 6LT', 'UK')\n", + "(19, 'Eastern Connection', 'Ann Devon', '35 King George', 'London', 'WX3 6FW', 'UK')\n", + "(53, 'North/South', 'Simon Crowther', 'South House 300 Queensbridge', 'London', 'SW7 1RZ', 'UK')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE 'a__%'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerName LIKE '_r%'\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(17, 'Drachenblut Delikatessend', 'Sven Ottlieb', 'Walserweg 21', 'Aachen', '52066', 'Germany')\n", + "(20, 'Ernst Handel', 'Roland Mendel', 'Kirchgasse 6', 'Graz', '8010', 'Austria')\n", + "(25, 'Frankenversand', 'Peter Franken', 'Berliner Platz 43', 'München', '80805', 'Germany')\n", + "(26, 'France restauration', 'Carine Schmitt', '54, rue Royale', 'Nantes', '44000', 'France')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country LIKE 'Spain'\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(22, 'FISSA Fabrica Inter. Salchichas S.A.', 'Diego Roel', 'C/ Moralzarzal, 86', 'Madrid', '28034', 'Spain')\n", + "(29, 'Galería del gastrónomo', 'Eduardo Saavedra', 'Rambla de Cataluña, 23', 'Barcelona', '8022', 'Spain')\n", + "(30, 'Godos Cocina Típica', 'José Pedro Freyre', 'C/ Romero, 33', 'Sevilla', '41101', 'Spain')\n", + "(69, 'Romero y tomillo', 'Alejandra Camino', 'Gran Vía, 1', 'Madrid', '28001', 'Spain')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Wildcards\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'a%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '%es'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '%mer%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE City LIKE '_ondon'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE City LIKE 'L___on'\")\n", + "# not supported in mysql, postgres\n", + "# execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '[bsp]%'\")\n", + "# execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '[a-f]%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE 'a__%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerName LIKE '_r%'\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country LIKE 'Spain'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Customers WHERE Country IN ('Germany', 'France', 'UK')\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "(7, 'Blondel père et fils', 'Frédérique Citeaux', '24, place Kléber', 'Strasbourg', '67000', 'France')\n", + "(9, \"Bon app''\", 'Laurence Lebihans', '12, rue des Bouchers', 'Marseille', '13008', 'France')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE Country NOT IN ('Germany', 'France', 'UK')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "(8, 'Bólido Comidas preparadas', 'Martín Sommer', 'C/ Araquil, 67', 'Madrid', '28023', 'Spain')\n", + "(10, 'Bottom-Dollar Marketse', 'Elizabeth Lincoln', '23 Tsawassen Blvd.', 'Tsawassen', 'T2F 8M4', 'Canada')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerID IN (SELECT CustomerID FROM Orders)\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "(7, 'Blondel père et fils', 'Frédérique Citeaux', '24, place Kléber', 'Strasbourg', '67000', 'France')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE CustomerID NOT IN (SELECT CustomerID FROM Orders)\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(6, 'Blauer See Delikatessen', 'Hanna Moos', 'Forsterstr. 57', 'Mannheim', '68306', 'Germany')\n", + "(12, 'Cactus Comidas para llevar', 'Patricio Simpson', 'Cerrito 333', 'Buenos Aires', '1010', 'Argentina')\n", + "(22, 'FISSA Fabrica Inter. Salchichas S.A.', 'Diego Roel', 'C/ Moralzarzal, 86', 'Madrid', '28034', 'Spain')\n", + "(26, 'France restauration', 'Carine Schmitt', '54, rue Royale', 'Nantes', '44000', 'France')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL In\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country IN ('Germany', 'France', 'UK')\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE Country NOT IN ('Germany', 'France', 'UK')\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerID IN (SELECT CustomerID FROM Orders)\")\n", + "execute_and_print_query(\"SELECT * FROM Customers WHERE CustomerID NOT IN (SELECT CustomerID FROM Orders)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT * FROM Products WHERE Price BETWEEN 10 AND 20\n", + "(1, 'Chais', 1, 1, '10 boxes x 20 bags', 18)\n", + "(2, 'Chang', 1, 1, '24 - 12 oz bottles', 19)\n", + "(3, 'Aniseed Syrup', 1, 2, '12 - 550 ml bottles', 10)\n", + "(15, 'Genen Shouyu', 6, 2, '24 - 250 ml bottles', 15.5)\n", + "(16, 'Pavlova', 7, 3, '32 - 500 g boxes', 17.45)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE Price NOT BETWEEN 10 AND 20\n", + "(4, \"Chef Anton's Cajun Seasoning\", 2, 2, '48 - 6 oz jars', 22)\n", + "(5, \"Chef Anton's Gumbo Mix\", 2, 2, '36 boxes', 21.35)\n", + "(6, \"Grandma's Boysenberry Spread\", 3, 2, '12 - 8 oz jars', 25)\n", + "(7, \"Uncle Bob's Organic Dried Pears\", 3, 7, '12 - 1 lb pkgs.', 30)\n", + "(8, 'Northwoods Cranberry Sauce', 3, 2, '12 - 12 oz jars', 40)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE Price BETWEEN 10 AND 20 AND CategoryID IN (1,2,3)\n", + "(1, 'Chais', 1, 1, '10 boxes x 20 bags', 18)\n", + "(2, 'Chang', 1, 1, '24 - 12 oz bottles', 19)\n", + "(3, 'Aniseed Syrup', 1, 2, '12 - 550 ml bottles', 10)\n", + "(15, 'Genen Shouyu', 6, 2, '24 - 250 ml bottles', 15.5)\n", + "(16, 'Pavlova', 7, 3, '32 - 500 g boxes', 17.45)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE ProductName BETWEEN 'Carnarvon Tigers' AND 'Mozzarella di Giovanni' ORDER BY ProductName\n", + "(18, 'Carnarvon Tigers', 7, 8, '16 kg pkg.', 62.5)\n", + "(1, 'Chais', 1, 1, '10 boxes x 20 bags', 18)\n", + "(2, 'Chang', 1, 1, '24 - 12 oz bottles', 19)\n", + "(39, 'Chartreuse verte', 18, 1, '750 cc per bottle', 18)\n", + "(4, \"Chef Anton's Cajun Seasoning\", 2, 2, '48 - 6 oz jars', 22)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE ProductName BETWEEN \"Carnarvon Tigers\" AND \"Chef Anton's Cajun Seasoning\" ORDER BY ProductName\n", + "(18, 'Carnarvon Tigers', 7, 8, '16 kg pkg.', 62.5)\n", + "(1, 'Chais', 1, 1, '10 boxes x 20 bags', 18)\n", + "(2, 'Chang', 1, 1, '24 - 12 oz bottles', 19)\n", + "(39, 'Chartreuse verte', 18, 1, '750 cc per bottle', 18)\n", + "(4, \"Chef Anton's Cajun Seasoning\", 2, 2, '48 - 6 oz jars', 22)\n", + "--------------------------------------------------\n", + "SELECT * FROM Products WHERE ProductName NOT BETWEEN 'Carnarvon Tigers' AND 'Mozzarella di Giovanni' ORDER BY ProductName\n", + "(17, 'Alice Mutton', 7, 6, '20 - 1 kg tins', 39)\n", + "(3, 'Aniseed Syrup', 1, 2, '12 - 550 ml bottles', 10)\n", + "(40, 'Boston Crab Meat', 19, 8, '24 - 4 oz tins', 18.4)\n", + "(60, 'Camembert Pierrot', 28, 4, '15 - 300 g rounds', 34)\n", + "(30, 'Nord-Ost Matjeshering', 13, 8, '10 - 200 g glasses', 25.89)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Orders WHERE OrderDate BETWEEN '1996-07-01' AND '1996-07-31'\n", + "(10248, 90, 5, '1996-07-04', 3)\n", + "(10249, 81, 6, '1996-07-05', 1)\n", + "(10250, 34, 4, '1996-07-08', 2)\n", + "(10251, 84, 3, '1996-07-08', 1)\n", + "(10252, 76, 4, '1996-07-09', 2)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Between\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE Price BETWEEN 10 AND 20\")\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE Price NOT BETWEEN 10 AND 20\")\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE Price BETWEEN 10 AND 20 AND CategoryID IN (1,2,3)\")\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE ProductName BETWEEN 'Carnarvon Tigers' AND 'Mozzarella di Giovanni' ORDER BY ProductName\")\n", + "execute_and_print_query(\"\"\"SELECT * FROM Products WHERE ProductName BETWEEN \"Carnarvon Tigers\" AND \"Chef Anton's Cajun Seasoning\" ORDER BY ProductName\"\"\")\n", + "execute_and_print_query(\"SELECT * FROM Products WHERE ProductName NOT BETWEEN 'Carnarvon Tigers' AND 'Mozzarella di Giovanni' ORDER BY ProductName\")\n", + "# Not supported in mysql\n", + "# execute_and_print_query(\"SELECT * FROM Orders WHERE OrderDate BETWEEN #07/01/1996# AND #07/31/1996#\")\n", + "execute_and_print_query(\"SELECT * FROM Orders WHERE OrderDate BETWEEN '1996-07-01' AND '1996-07-31'\")" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT CustomerID AS ID FROM Customers\n", + "(1,)\n", + "(2,)\n", + "(3,)\n", + "(4,)\n", + "(5,)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT CustomerID ID FROM Customers\n", + "(1,)\n", + "(2,)\n", + "(3,)\n", + "(4,)\n", + "(5,)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT CustomerID AS ID, CustomerName AS Customer FROM Customers\n", + "(1, 'Alfreds Futterkiste')\n", + "(2, 'Ana Trujillo Emparedados y helados')\n", + "(3, 'Antonio Moreno Taquería')\n", + "(4, 'Around the Horn')\n", + "(5, 'Berglunds snabbköp')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT ProductName AS [My Great Products] FROM Products\n", + "('Chais',)\n", + "('Chang',)\n", + "('Aniseed Syrup',)\n", + "(\"Chef Anton's Cajun Seasoning\",)\n", + "(\"Chef Anton's Gumbo Mix\",)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT ProductName AS \"My Great Products\" FROM Products\n", + "('Chais',)\n", + "('Chang',)\n", + "('Aniseed Syrup',)\n", + "(\"Chef Anton's Cajun Seasoning\",)\n", + "(\"Chef Anton's Gumbo Mix\",)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT CustomerName, CONCAT(Address,', ',PostalCode,', ',City,', ',Country) AS Address FROM Customers\n", + "('Alfreds Futterkiste', 'Obere Str. 57, 12209, Berlin, Germany')\n", + "('Ana Trujillo Emparedados y helados', 'Avda. de la Constitución 2222, 5021, México D.F., Mexico')\n", + "('Antonio Moreno Taquería', 'Mataderos 2312, 5023, México D.F., Mexico')\n", + "('Around the Horn', '120 Hanover Sq., WA1 1DP, London, UK')\n", + "('Berglunds snabbköp', 'Berguvsvägen 8, S-958 22, Luleå, Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers AS Persons\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT o.OrderID, o.OrderDate, c.CustomerName FROM Customers AS c, Orders AS o WHERE c.CustomerName='Around the Horn' AND c.CustomerID=o.CustomerID\n", + "(10355, '1996-11-15', 'Around the Horn')\n", + "(10383, '1996-12-16', 'Around the Horn')\n", + "--------------------------------------------------\n", + "SELECT Orders.OrderID, Orders.OrderDate, Customers.CustomerName FROM Customers, Orders WHERE Customers.CustomerName='Around the Horn' AND Customers.CustomerID=Orders.CustomerID\n", + "(10355, '1996-11-15', 'Around the Horn')\n", + "(10383, '1996-12-16', 'Around the Horn')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Aliases\n", + "execute_and_print_query(\"SELECT CustomerID AS ID FROM Customers\")\n", + "execute_and_print_query(\"SELECT CustomerID ID FROM Customers\")\n", + "execute_and_print_query(\"SELECT CustomerID AS ID, CustomerName AS Customer FROM Customers\")\n", + "execute_and_print_query(\"SELECT ProductName AS [My Great Products] FROM Products\")\n", + "execute_and_print_query('SELECT ProductName AS \"My Great Products\" FROM Products')\n", + "execute_and_print_query(\"SELECT CustomerName, CONCAT(Address,', ',PostalCode,', ',City,', ',Country) AS Address FROM Customers\")\n", + "execute_and_print_query(\"SELECT * FROM Customers AS Persons\")\n", + "execute_and_print_query(\"SELECT o.OrderID, o.OrderDate, c.CustomerName FROM Customers AS c, Orders AS o WHERE c.CustomerName='Around the Horn' AND c.CustomerID=o.CustomerID\")\n", + "execute_and_print_query(\"SELECT Orders.OrderID, Orders.OrderDate, Customers.CustomerName FROM Customers, Orders WHERE Customers.CustomerName='Around the Horn' AND Customers.CustomerID=Orders.CustomerID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT Orders.OrderID, Customers.CustomerName, Orders.OrderDate\n", + "FROM Orders\n", + "INNER JOIN Customers ON Orders.CustomerID=Customers.CustomerID\n", + "(10248, 'Wilman Kala', '1996-07-04')\n", + "(10249, 'Tradição Hipermercados', '1996-07-05')\n", + "(10250, 'Hanari Carnes', '1996-07-08')\n", + "(10251, 'Victuailles en stock', '1996-07-08')\n", + "(10252, 'Suprêmes délices', '1996-07-09')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Joins\n", + "execute_and_print_query(\"\"\"SELECT Orders.OrderID, Customers.CustomerName, Orders.OrderDate\n", + "FROM Orders\n", + "INNER JOIN Customers ON Orders.CustomerID=Customers.CustomerID\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT ProductID, ProductName, CategoryName\n", + "FROM Products\n", + "INNER JOIN Categories ON Products.CategoryID = Categories.CategoryID\n", + "(1, 'Chais', 'Beverages')\n", + "(2, 'Chang', 'Beverages')\n", + "(3, 'Aniseed Syrup', 'Condiments')\n", + "(4, \"Chef Anton's Cajun Seasoning\", 'Condiments')\n", + "(5, \"Chef Anton's Gumbo Mix\", 'Condiments')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Products.ProductID, Products.ProductName, Categories.CategoryName\n", + "FROM Products\n", + "INNER JOIN Categories ON Products.CategoryID = Categories.CategoryID\n", + "(1, 'Chais', 'Beverages')\n", + "(2, 'Chang', 'Beverages')\n", + "(3, 'Aniseed Syrup', 'Condiments')\n", + "(4, \"Chef Anton's Cajun Seasoning\", 'Condiments')\n", + "(5, \"Chef Anton's Gumbo Mix\", 'Condiments')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Products.ProductID, Products.ProductName, Categories.CategoryName\n", + "FROM Products\n", + "JOIN Categories ON Products.CategoryID = Categories.CategoryID\n", + "(1, 'Chais', 'Beverages')\n", + "(2, 'Chang', 'Beverages')\n", + "(3, 'Aniseed Syrup', 'Condiments')\n", + "(4, \"Chef Anton's Cajun Seasoning\", 'Condiments')\n", + "(5, \"Chef Anton's Gumbo Mix\", 'Condiments')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Orders.OrderID, Customers.CustomerName, Shippers.ShipperName\n", + "FROM ((Orders\n", + "INNER JOIN Customers ON Orders.CustomerID = Customers.CustomerID)\n", + "INNER JOIN Shippers ON Orders.ShipperID = Shippers.ShipperID)\n", + "(10248, 'Wilman Kala', 'Federal Shipping')\n", + "(10249, 'Tradição Hipermercados', 'Speedy Express')\n", + "(10250, 'Hanari Carnes', 'United Package')\n", + "(10251, 'Victuailles en stock', 'Speedy Express')\n", + "(10252, 'Suprêmes délices', 'United Package')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Inner Join\n", + "# 그냥 join이랑 inner join이랑 같다.\n", + "execute_and_print_query(\"\"\"SELECT ProductID, ProductName, CategoryName\n", + "FROM Products\n", + "INNER JOIN Categories ON Products.CategoryID = Categories.CategoryID\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Products.ProductID, Products.ProductName, Categories.CategoryName\n", + "FROM Products\n", + "INNER JOIN Categories ON Products.CategoryID = Categories.CategoryID\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Products.ProductID, Products.ProductName, Categories.CategoryName\n", + "FROM Products\n", + "JOIN Categories ON Products.CategoryID = Categories.CategoryID\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Orders.OrderID, Customers.CustomerName, Shippers.ShipperName\n", + "FROM ((Orders\n", + "INNER JOIN Customers ON Orders.CustomerID = Customers.CustomerID)\n", + "INNER JOIN Shippers ON Orders.ShipperID = Shippers.ShipperID)\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT Customers.CustomerName, Orders.OrderID\n", + "FROM Customers\n", + "LEFT JOIN Orders ON Customers.CustomerID = Orders.CustomerID\n", + "ORDER BY Customers.CustomerName\n", + "('Alfreds Futterkiste', None)\n", + "('Ana Trujillo Emparedados y helados', 10308)\n", + "('Antonio Moreno Taquería', 10365)\n", + "('Around the Horn', 10355)\n", + "('Around the Horn', 10383)\n", + "(\"B''s Beverages\", 10289)\n", + "('Berglunds snabbköp', 10278)\n", + "('Berglunds snabbköp', 10280)\n", + "('Berglunds snabbköp', 10384)\n", + "('Blauer See Delikatessen', None)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Left Join\n", + "execute_and_print_query(\"\"\"SELECT Customers.CustomerName, Orders.OrderID\n", + "FROM Customers\n", + "LEFT JOIN Orders ON Customers.CustomerID = Orders.CustomerID\n", + "ORDER BY Customers.CustomerName\"\"\", limit=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT Orders.OrderID, Employees.LastName, Employees.FirstName\n", + "FROM Orders\n", + "RIGHT JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID\n", + "ORDER BY Orders.OrderID\n", + "(None, 'West', 'Adam')\n", + "(10248, 'Buchanan', 'Steven')\n", + "(10249, 'Suyama', 'Michael')\n", + "(10250, 'Peacock', 'Margaret')\n", + "(10251, 'Leverling', 'Janet')\n", + "(10252, 'Peacock', 'Margaret')\n", + "(10253, 'Leverling', 'Janet')\n", + "(10254, 'Buchanan', 'Steven')\n", + "(10255, 'Dodsworth', 'Anne')\n", + "(10256, 'Leverling', 'Janet')\n", + "(10257, 'Peacock', 'Margaret')\n", + "(10258, 'Davolio', 'Nancy')\n", + "(10259, 'Peacock', 'Margaret')\n", + "(10260, 'Peacock', 'Margaret')\n", + "(10261, 'Peacock', 'Margaret')\n", + "(10262, 'Callahan', 'Laura')\n", + "(10263, 'Dodsworth', 'Anne')\n", + "(10264, 'Suyama', 'Michael')\n", + "(10265, 'Fuller', 'Andrew')\n", + "(10266, 'Leverling', 'Janet')\n", + "(10267, 'Peacock', 'Margaret')\n", + "(10268, 'Callahan', 'Laura')\n", + "(10269, 'Buchanan', 'Steven')\n", + "(10270, 'Davolio', 'Nancy')\n", + "(10271, 'Suyama', 'Michael')\n", + "(10272, 'Suyama', 'Michael')\n", + "(10273, 'Leverling', 'Janet')\n", + "(10274, 'Suyama', 'Michael')\n", + "(10275, 'Davolio', 'Nancy')\n", + "(10276, 'Callahan', 'Laura')\n", + "(10277, 'Fuller', 'Andrew')\n", + "(10278, 'Callahan', 'Laura')\n", + "(10279, 'Callahan', 'Laura')\n", + "(10280, 'Fuller', 'Andrew')\n", + "(10281, 'Peacock', 'Margaret')\n", + "(10282, 'Peacock', 'Margaret')\n", + "(10283, 'Leverling', 'Janet')\n", + "(10284, 'Peacock', 'Margaret')\n", + "(10285, 'Davolio', 'Nancy')\n", + "(10286, 'Callahan', 'Laura')\n", + "(10287, 'Callahan', 'Laura')\n", + "(10288, 'Peacock', 'Margaret')\n", + "(10289, 'King', 'Robert')\n", + "(10290, 'Callahan', 'Laura')\n", + "(10291, 'Suyama', 'Michael')\n", + "(10292, 'Davolio', 'Nancy')\n", + "(10293, 'Davolio', 'Nancy')\n", + "(10294, 'Peacock', 'Margaret')\n", + "(10295, 'Fuller', 'Andrew')\n", + "(10296, 'Suyama', 'Michael')\n", + "(10297, 'Buchanan', 'Steven')\n", + "(10298, 'Suyama', 'Michael')\n", + "(10299, 'Peacock', 'Margaret')\n", + "(10300, 'Fuller', 'Andrew')\n", + "(10301, 'Callahan', 'Laura')\n", + "(10302, 'Peacock', 'Margaret')\n", + "(10303, 'King', 'Robert')\n", + "(10304, 'Davolio', 'Nancy')\n", + "(10305, 'Callahan', 'Laura')\n", + "(10306, 'Davolio', 'Nancy')\n", + "(10307, 'Fuller', 'Andrew')\n", + "(10308, 'King', 'Robert')\n", + "(10309, 'Leverling', 'Janet')\n", + "(10310, 'Callahan', 'Laura')\n", + "(10311, 'Davolio', 'Nancy')\n", + "(10312, 'Fuller', 'Andrew')\n", + "(10313, 'Fuller', 'Andrew')\n", + "(10314, 'Davolio', 'Nancy')\n", + "(10315, 'Peacock', 'Margaret')\n", + "(10316, 'Davolio', 'Nancy')\n", + "(10317, 'Suyama', 'Michael')\n", + "(10318, 'Callahan', 'Laura')\n", + "(10319, 'King', 'Robert')\n", + "(10320, 'Buchanan', 'Steven')\n", + "(10321, 'Leverling', 'Janet')\n", + "(10322, 'King', 'Robert')\n", + "(10323, 'Peacock', 'Margaret')\n", + "(10324, 'Dodsworth', 'Anne')\n", + "(10325, 'Davolio', 'Nancy')\n", + "(10326, 'Peacock', 'Margaret')\n", + "(10327, 'Fuller', 'Andrew')\n", + "(10328, 'Peacock', 'Margaret')\n", + "(10329, 'Peacock', 'Margaret')\n", + "(10330, 'Leverling', 'Janet')\n", + "(10331, 'Dodsworth', 'Anne')\n", + "(10332, 'Leverling', 'Janet')\n", + "(10333, 'Buchanan', 'Steven')\n", + "(10334, 'Callahan', 'Laura')\n", + "(10335, 'King', 'Robert')\n", + "(10336, 'King', 'Robert')\n", + "(10337, 'Peacock', 'Margaret')\n", + "(10338, 'Peacock', 'Margaret')\n", + "(10339, 'Fuller', 'Andrew')\n", + "(10340, 'Davolio', 'Nancy')\n", + "(10341, 'King', 'Robert')\n", + "(10342, 'Peacock', 'Margaret')\n", + "(10343, 'Peacock', 'Margaret')\n", + "(10344, 'Peacock', 'Margaret')\n", + "(10345, 'Fuller', 'Andrew')\n", + "(10346, 'Leverling', 'Janet')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Right Join\n", + "execute_and_print_query(\"\"\"SELECT Orders.OrderID, Employees.LastName, Employees.FirstName\n", + "FROM Orders\n", + "RIGHT JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID\n", + "ORDER BY Orders.OrderID\"\"\", limit=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT Customers.CustomerName, Orders.OrderID\n", + "FROM Customers\n", + "FULL OUTER JOIN Orders ON Customers.CustomerID=Orders.CustomerID\n", + "ORDER BY Customers.CustomerName\n", + "('Alfreds Futterkiste', None)\n", + "('Ana Trujillo Emparedados y helados', 10308)\n", + "('Antonio Moreno Taquería', 10365)\n", + "('Around the Horn', 10355)\n", + "('Around the Horn', 10383)\n", + "(\"B''s Beverages\", 10289)\n", + "('Berglunds snabbköp', 10278)\n", + "('Berglunds snabbköp', 10280)\n", + "('Berglunds snabbköp', 10384)\n", + "('Blauer See Delikatessen', None)\n", + "('Blondel père et fils', 10265)\n", + "('Blondel père et fils', 10297)\n", + "('Blondel père et fils', 10360)\n", + "('Blondel père et fils', 10436)\n", + "(\"Bon app''\", 10331)\n", + "(\"Bon app''\", 10340)\n", + "(\"Bon app''\", 10362)\n", + "('Bottom-Dollar Marketse', 10389)\n", + "('Bottom-Dollar Marketse', 10410)\n", + "('Bottom-Dollar Marketse', 10411)\n", + "('Bottom-Dollar Marketse', 10431)\n", + "('Bólido Comidas preparadas', 10326)\n", + "('Cactus Comidas para llevar', None)\n", + "('Centro comercial Moctezuma', 10259)\n", + "('Chop-suey Chinese', 10254)\n", + "('Chop-suey Chinese', 10370)\n", + "('Comércio Mineiro', 10290)\n", + "('Consolidated Holdings', 10435)\n", + "('Die Wandernde Kuh', 10301)\n", + "('Die Wandernde Kuh', 10312)\n", + "('Die Wandernde Kuh', 10348)\n", + "('Die Wandernde Kuh', 10356)\n", + "('Drachenblut Delikatessend', 10363)\n", + "('Drachenblut Delikatessend', 10391)\n", + "('Du monde entier', 10311)\n", + "('Eastern Connection', 10364)\n", + "('Eastern Connection', 10400)\n", + "('Ernst Handel', 10258)\n", + "('Ernst Handel', 10263)\n", + "('Ernst Handel', 10351)\n", + "('Ernst Handel', 10368)\n", + "('Ernst Handel', 10382)\n", + "('Ernst Handel', 10390)\n", + "('Ernst Handel', 10402)\n", + "('Ernst Handel', 10403)\n", + "('Ernst Handel', 10430)\n", + "('Ernst Handel', 10442)\n", + "('FISSA Fabrica Inter. Salchichas S.A.', None)\n", + "('Familia Arquibaldo', 10347)\n", + "('Familia Arquibaldo', 10386)\n", + "('Familia Arquibaldo', 10414)\n", + "('Folies gourmandes', 10408)\n", + "('Folk och fä HB', 10264)\n", + "('Folk och fä HB', 10327)\n", + "('Folk och fä HB', 10378)\n", + "('Folk och fä HB', 10434)\n", + "('France restauration', None)\n", + "('Franchi S.p.A.', 10422)\n", + "('Frankenversand', 10267)\n", + "('Frankenversand', 10337)\n", + "('Frankenversand', 10342)\n", + "('Frankenversand', 10396)\n", + "('Furia Bacalhau e Frutos do Mar', 10328)\n", + "('Furia Bacalhau e Frutos do Mar', 10352)\n", + "('GROSELLA-Restaurante', 10268)\n", + "('Galería del gastrónomo', 10366)\n", + "('Galería del gastrónomo', 10426)\n", + "('Godos Cocina Típica', 10303)\n", + "('Gourmet Lanchonetes', 10423)\n", + "('Great Lakes Food Market', None)\n", + "('HILARIÓN-Abastos', 10257)\n", + "('HILARIÓN-Abastos', 10395)\n", + "('Hanari Carnes', 10250)\n", + "('Hanari Carnes', 10253)\n", + "('Hungry Coyote Import Store', 10375)\n", + "('Hungry Coyote Import Store', 10394)\n", + "('Hungry Coyote Import Store', 10415)\n", + "('Hungry Owl All-Night Grocers', 10298)\n", + "('Hungry Owl All-Night Grocers', 10309)\n", + "('Hungry Owl All-Night Grocers', 10335)\n", + "('Hungry Owl All-Night Grocers', 10373)\n", + "('Hungry Owl All-Night Grocers', 10380)\n", + "('Hungry Owl All-Night Grocers', 10429)\n", + "('Island Trading', 10315)\n", + "('Island Trading', 10318)\n", + "('Island Trading', 10321)\n", + "('Königlich Essen', 10323)\n", + "('Königlich Essen', 10325)\n", + "('LILA-Supermercado', 10283)\n", + "('LILA-Supermercado', 10296)\n", + "('LILA-Supermercado', 10330)\n", + "('LILA-Supermercado', 10357)\n", + "('LILA-Supermercado', 10381)\n", + "('LINO-Delicateses', 10405)\n", + "(\"La corne d''abondance\", None)\n", + "(\"La maison d''Asie\", 10350)\n", + "(\"La maison d''Asie\", 10358)\n", + "(\"La maison d''Asie\", 10371)\n", + "(\"La maison d''Asie\", 10413)\n", + "(\"La maison d''Asie\", 10425)\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Full Join\n", + "execute_and_print_query(\"\"\"SELECT Customers.CustomerName, Orders.OrderID\n", + "FROM Customers\n", + "FULL OUTER JOIN Orders ON Customers.CustomerID=Orders.CustomerID\n", + "ORDER BY Customers.CustomerName\"\"\", limit=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT A.CustomerName AS CustomerName1, B.CustomerName AS CustomerName2, A.City\n", + "FROM Customers A, Customers B\n", + "WHERE A.CustomerID <> B.CustomerID\n", + "AND A.City = B.City\n", + "ORDER BY A.City\n", + "('Cactus Comidas para llevar', 'Océano Atlántico Ltda.', 'Buenos Aires')\n", + "('Cactus Comidas para llevar', 'Rancho grande', 'Buenos Aires')\n", + "('Océano Atlántico Ltda.', 'Cactus Comidas para llevar', 'Buenos Aires')\n", + "('Océano Atlántico Ltda.', 'Rancho grande', 'Buenos Aires')\n", + "('Rancho grande', 'Cactus Comidas para llevar', 'Buenos Aires')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Self Join\n", + "execute_and_print_query(\"\"\"SELECT A.CustomerName AS CustomerName1, B.CustomerName AS CustomerName2, A.City\n", + "FROM Customers A, Customers B\n", + "WHERE A.CustomerID <> B.CustomerID\n", + "AND A.City = B.City\n", + "ORDER BY A.City\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT City FROM Customers\n", + "UNION\n", + "SELECT City FROM Suppliers\n", + "ORDER BY City\n", + "\n", + "('Aachen',)\n", + "('Albuquerque',)\n", + "('Anchorage',)\n", + "('Ann Arbor',)\n", + "('Annecy',)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT City FROM Customers\n", + "UNION ALL\n", + "SELECT City FROM Suppliers\n", + "ORDER BY City\n", + "\n", + "('Aachen',)\n", + "('Albuquerque',)\n", + "('Anchorage',)\n", + "('Ann Arbor',)\n", + "('Annecy',)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT City, Country FROM Customers\n", + "WHERE Country='Germany'\n", + "UNION\n", + "SELECT City, Country FROM Suppliers\n", + "WHERE Country='Germany'\n", + "ORDER BY City\n", + "\n", + "('Aachen', 'Germany')\n", + "('Berlin', 'Germany')\n", + "('Brandenburg', 'Germany')\n", + "('Cunewalde', 'Germany')\n", + "('Cuxhaven', 'Germany')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT City, Country FROM Customers\n", + "WHERE Country='Germany'\n", + "UNION ALL\n", + "SELECT City, Country FROM Suppliers\n", + "WHERE Country='Germany'\n", + "ORDER BY City\n", + "('Aachen', 'Germany')\n", + "('Berlin', 'Germany')\n", + "('Berlin', 'Germany')\n", + "('Brandenburg', 'Germany')\n", + "('Cunewalde', 'Germany')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT 'Customer' AS Type, ContactName, City, Country\n", + "FROM Customers\n", + "UNION\n", + "SELECT 'Supplier', ContactName, City, Country\n", + "FROM Suppliers\n", + "('Customer', 'Alejandra Camino', 'Madrid', 'Spain')\n", + "('Customer', 'Alexander Feuer', 'Leipzig', 'Germany')\n", + "('Customer', 'Ana Trujillo', 'México D.F.', 'Mexico')\n", + "('Customer', 'Anabela Domingues', 'São Paulo', 'Brazil')\n", + "('Customer', 'André Fonseca', 'Campinas', 'Brazil')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Union\n", + "execute_and_print_query(\"\"\"SELECT City FROM Customers\n", + "UNION\n", + "SELECT City FROM Suppliers\n", + "ORDER BY City\n", + "\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT City FROM Customers\n", + "UNION ALL\n", + "SELECT City FROM Suppliers\n", + "ORDER BY City\n", + "\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT City, Country FROM Customers\n", + "WHERE Country='Germany'\n", + "UNION\n", + "SELECT City, Country FROM Suppliers\n", + "WHERE Country='Germany'\n", + "ORDER BY City\n", + "\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT City, Country FROM Customers\n", + "WHERE Country='Germany'\n", + "UNION ALL\n", + "SELECT City, Country FROM Suppliers\n", + "WHERE Country='Germany'\n", + "ORDER BY City\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT 'Customer' AS Type, ContactName, City, Country\n", + "FROM Customers\n", + "UNION\n", + "SELECT 'Supplier', ContactName, City, Country\n", + "FROM Suppliers\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "(3, 'Argentina')\n", + "(2, 'Austria')\n", + "(2, 'Belgium')\n", + "(9, 'Brazil')\n", + "(3, 'Canada')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "ORDER BY COUNT(CustomerID) DESC\n", + "(13, 'USA')\n", + "(11, 'Germany')\n", + "(11, 'France')\n", + "(9, 'Brazil')\n", + "(7, 'UK')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Shippers.ShipperName, COUNT(Orders.OrderID) AS NumberOfOrders FROM Orders\n", + "LEFT JOIN Shippers ON Orders.ShipperID = Shippers.ShipperID\n", + "GROUP BY ShipperName\n", + "('Federal Shipping', 68)\n", + "('Speedy Express', 54)\n", + "('United Package', 74)\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Group By\n", + "execute_and_print_query(\"\"\"SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "ORDER BY COUNT(CustomerID) DESC\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Shippers.ShipperName, COUNT(Orders.OrderID) AS NumberOfOrders FROM Orders\n", + "LEFT JOIN Shippers ON Orders.ShipperID = Shippers.ShipperID\n", + "GROUP BY ShipperName\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "HAVING COUNT(CustomerID) > 5\n", + "(9, 'Brazil')\n", + "(11, 'France')\n", + "(11, 'Germany')\n", + "(7, 'UK')\n", + "(13, 'USA')\n", + "--------------------------------------------------\n", + "SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "HAVING COUNT(CustomerID) > 5\n", + "ORDER BY COUNT(CustomerID) DESC\n", + "(13, 'USA')\n", + "(11, 'Germany')\n", + "(11, 'France')\n", + "(9, 'Brazil')\n", + "(7, 'UK')\n", + "--------------------------------------------------\n", + "SELECT Employees.LastName, COUNT(Orders.OrderID) AS NumberOfOrders\n", + "FROM (Orders\n", + "INNER JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID)\n", + "GROUP BY LastName\n", + "HAVING COUNT(Orders.OrderID) > 10\n", + "('Buchanan', 11)\n", + "('Callahan', 27)\n", + "('Davolio', 29)\n", + "('Fuller', 20)\n", + "('King', 14)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT Employees.LastName, COUNT(Orders.OrderID) AS NumberOfOrders\n", + "FROM Orders\n", + "INNER JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID\n", + "WHERE LastName = 'Davolio' OR LastName = 'Fuller'\n", + "GROUP BY LastName\n", + "HAVING COUNT(Orders.OrderID) > 25\n", + "('Davolio', 29)\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Having\n", + "execute_and_print_query(\"\"\"SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "HAVING COUNT(CustomerID) > 5\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT COUNT(CustomerID), Country\n", + "FROM Customers\n", + "GROUP BY Country\n", + "HAVING COUNT(CustomerID) > 5\n", + "ORDER BY COUNT(CustomerID) DESC\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Employees.LastName, COUNT(Orders.OrderID) AS NumberOfOrders\n", + "FROM (Orders\n", + "INNER JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID)\n", + "GROUP BY LastName\n", + "HAVING COUNT(Orders.OrderID) > 10\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT Employees.LastName, COUNT(Orders.OrderID) AS NumberOfOrders\n", + "FROM Orders\n", + "INNER JOIN Employees ON Orders.EmployeeID = Employees.EmployeeID\n", + "WHERE LastName = 'Davolio' OR LastName = 'Fuller'\n", + "GROUP BY LastName\n", + "HAVING COUNT(Orders.OrderID) > 25\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT SupplierName\n", + "FROM Suppliers\n", + "WHERE EXISTS (SELECT ProductName FROM Products WHERE Products.SupplierID = Suppliers.supplierID AND Price < 20)\n", + "('Exotic Liquid',)\n", + "('New Orleans Cajun Delights',)\n", + "('Tokyo Traders',)\n", + "(\"Mayumi's\",)\n", + "('Pavlova, Ltd.',)\n", + "...\n", + "--------------------------------------------------\n", + "SELECT SupplierName\n", + "FROM Suppliers\n", + "WHERE EXISTS (SELECT ProductName FROM Products WHERE Products.SupplierID = Suppliers.supplierID AND Price = 22)\n", + "('New Orleans Cajun Delights',)\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Exists\n", + "execute_and_print_query(\"\"\"SELECT SupplierName\n", + "FROM Suppliers\n", + "WHERE EXISTS (SELECT ProductName FROM Products WHERE Products.SupplierID = Suppliers.supplierID AND Price < 20)\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT SupplierName\n", + "FROM Suppliers\n", + "WHERE EXISTS (SELECT ProductName FROM Products WHERE Products.SupplierID = Suppliers.supplierID AND Price = 22)\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Any, All\n", + "# not supported in sqlite\n", + "# execute_and_print_query(\"\"\"SELECT ProductName\n", + "# FROM Products\n", + "# WHERE ProductID = ANY(SELECT ProductID\n", + "# FROM OrderDetails\n", + "# WHERE Quantity = 10)\"\"\")\n", + "# execute_and_print_query(\"\"\"SELECT ProductName\n", + "# FROM Products\n", + "# WHERE ProductID = ANY(SELECT ProductID\n", + "# FROM OrderDetails\n", + "# WHERE Quantity > 99)\"\"\")\n", + "# execute_and_print_query(\"\"\"SELECT ProductName\n", + "# FROM Products\n", + "# WHERE ProductID = ANY(SELECT ProductID\n", + "# FROM OrderDetails\n", + "# WHERE Quantity > 1000)\"\"\")\n", + "# execute_and_print_query(\"\"\"SELECT ALL ProductName\n", + "# FROM Products\n", + "# WHERE TRUE\"\"\")\n", + "# execute_and_print_query(\"\"\"SELECT ProductName\n", + "# FROM Products\n", + "# WHERE ProductID = ALL\n", + "# (SELECT ProductID\n", + "# FROM OrderDetails\n", + "# WHERE Quantity = 10)\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Select Into\n", + "# not supported in sqlite\n", + "# execute_and_print_query(\"SELECT * INTO CustomersBackup2017 FROM Customers\")\n", + "# execute_and_print_query(\"SELECT * INTO CustomersBackup2017 IN 'Backup.mdb' FROM Customers\")\n", + "# execute_and_print_query(\"SELECT CustomerName, ContactName INTO CustomersBackup2017 FROM Customers\")\n", + "# execute_and_print_query(\"SELECT * INTO CustomersGermany FROM Customers WHERE Country = 'Germany'\")\n", + "# execute_and_print_query(\"\"\"SELECT Customers.CustomerName, Orders.OrderID\n", + "# INTO CustomersOrderBackup2017\n", + "# FROM Customers\n", + "# LEFT JOIN Orders ON Customers.CustomerID = Orders.CustomerID\"\"\")\n", + "# execute_and_print_query(\"\"\"SELECT * INTO newtable\n", + "# FROM oldtable\n", + "# WHERE 1 = 0\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INSERT INTO Customers (CustomerName, City, Country)\n", + "SELECT SupplierName, City, Country FROM Suppliers\n", + "--------------------------------------------------\n", + "INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)\n", + "SELECT SupplierName, ContactName, Address, City, PostalCode, Country FROM Suppliers\n", + "--------------------------------------------------\n", + "INSERT INTO Customers (CustomerName, City, Country)\n", + "SELECT SupplierName, City, Country FROM Suppliers\n", + "WHERE Country='Germany'\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Insert Into Select\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, City, Country)\n", + "SELECT SupplierName, City, Country FROM Suppliers\"\"\")\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, ContactName, Address, City, PostalCode, Country)\n", + "SELECT SupplierName, ContactName, Address, City, PostalCode, Country FROM Suppliers\"\"\")\n", + "execute_and_print_query(\"\"\"INSERT INTO Customers (CustomerName, City, Country)\n", + "SELECT SupplierName, City, Country FROM Suppliers\n", + "WHERE Country='Germany'\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT OrderID, Quantity,\n", + "CASE\n", + " WHEN Quantity > 30 THEN 'The quantity is greater than 30'\n", + " WHEN Quantity = 30 THEN 'The quantity is 30'\n", + " ELSE 'The quantity is under 30'\n", + "END AS QuantityText\n", + "FROM OrderDetails\n", + "(10248, 12, 'The quantity is under 30')\n", + "(10248, 10, 'The quantity is under 30')\n", + "(10248, 5, 'The quantity is under 30')\n", + "(10249, 9, 'The quantity is under 30')\n", + "(10249, 40, 'The quantity is greater than 30')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT CustomerName, City, Country\n", + "FROM Customers\n", + "ORDER BY\n", + "(CASE\n", + " WHEN City IS NULL THEN Country\n", + " ELSE City\n", + "END)\n", + "('Drachenblut Delikatessend', 'Aachen', 'Germany')\n", + "('Rattlesnake Canyon Grocery', 'Albuquerque', 'USA')\n", + "('Old World Delicatessen', 'Anchorage', 'USA')\n", + "(\"Grandma Kelly's Homestead\", 'Ann Arbor', 'USA')\n", + "(\"Grandma Kelly's Homestead\", 'Ann Arbor', 'USA')\n", + "...\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Case\n", + "execute_and_print_query(\"\"\"SELECT OrderID, Quantity,\n", + "CASE\n", + " WHEN Quantity > 30 THEN 'The quantity is greater than 30'\n", + " WHEN Quantity = 30 THEN 'The quantity is 30'\n", + " ELSE 'The quantity is under 30'\n", + "END AS QuantityText\n", + "FROM OrderDetails\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT CustomerName, City, Country\n", + "FROM Customers\n", + "ORDER BY\n", + "(CASE\n", + " WHEN City IS NULL THEN Country\n", + " ELSE City\n", + "END)\"\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Null Functions\n", + "# execute_and_print_query(\"SELECT ProductName, UnitPrice * (UnitsInStock + IFNULL(UnitsOnOrder, 0)) FROM Products\")\n", + "# execute_and_print_query(\"SELECT ProductName, UnitPrice * (UnitsInStock + COALESCE(UnitsOnOrder, 0)) FROM Products\")" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Stored Procedures\n", + "# not supported in sqlite\n", + "# execute_and_print_query(\"\"\"CREATE PROCEDURE SelectAllCustomers\n", + "# AS\n", + "# SELECT * FROM Customers\n", + "# GO\"\"\")\n", + "# execute_and_print_query(\"EXEC SelectAllCustomers\")\n", + "# execute_and_print_query(\"\"\"CREATE PROCEDURE SelectAllCustomers @City nvarchar(30)\n", + "# AS\n", + "# SELECT * FROM Customers WHERE City = @City\n", + "# GO\"\"\")\n", + "# execute_and_print_query(\"EXEC SelectAllCustomers @City = 'London'\")\n", + "# execute_and_print_query(\"\"\"CREATE PROCEDURE SelectAllCustomers @City nvarchar(30), @PostalCode nvarchar(10)\n", + "# AS\n", + "# SELECT * FROM Customers WHERE City = @City AND PostalCode = @PostalCode\n", + "# GO;\n", + "# \"\"\")\n", + "# execute_and_print_query(\"EXEC SelectAllCustomers @City = 'London', @PostalCode = 'WA1 1DP'\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- Select all:\n", + "SELECT * FROM Customers\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers -- WHERE City='Berlin'\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "-- SELECT * FROM Customers;\n", + "SELECT * FROM Products\n", + "(1, 'Chais', 1, 1, '10 boxes x 20 bags', 18)\n", + "(2, 'Chang', 1, 1, '24 - 12 oz bottles', 19)\n", + "(3, 'Aniseed Syrup', 1, 2, '12 - 550 ml bottles', 10)\n", + "(4, \"Chef Anton's Cajun Seasoning\", 2, 2, '48 - 6 oz jars', 22)\n", + "(5, \"Chef Anton's Gumbo Mix\", 2, 2, '36 boxes', 21.35)\n", + "...\n", + "--------------------------------------------------\n", + "/*Select all the columns\n", + "of all the records\n", + "in the Customers table:*/\n", + "SELECT * FROM Customers\n", + "(1, 'Alfreds Futterkiste', 'Maria Anders', 'Obere Str. 57', 'Berlin', '12209', 'Germany')\n", + "(2, 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Avda. de la Constitución 2222', 'México D.F.', '5021', 'Mexico')\n", + "(3, 'Antonio Moreno Taquería', 'Antonio Moreno', 'Mataderos 2312', 'México D.F.', '5023', 'Mexico')\n", + "(4, 'Around the Horn', 'Thomas Hardy', '120 Hanover Sq.', 'London', 'WA1 1DP', 'UK')\n", + "(5, 'Berglunds snabbköp', 'Christina Berglund', 'Berguvsvägen 8', 'Luleå', 'S-958 22', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "/*SELECT * FROM Customers;\n", + "SELECT * FROM Products;\n", + "SELECT * FROM Orders;\n", + "SELECT * FROM Categories;*/\n", + "SELECT * FROM Suppliers\n", + "(1, 'Exotic Liquid', 'Charlotte Cooper', '49 Gilbert St.', 'Londona', 'EC1 4SD', 'UK', '(171) 555-2222')\n", + "(2, 'New Orleans Cajun Delights', 'Shelley Burke', 'P.O. Box 78934', 'New Orleans', '70117', 'USA', '(100) 555-4822')\n", + "(3, \"Grandma Kelly's Homestead\", 'Regina Murphy', '707 Oxford Rd.', 'Ann Arbor', '48104', 'USA', '(313) 555-5735')\n", + "(4, 'Tokyo Traders', 'Yoshi Nagase', '9-8 Sekimai Musashino-shi', 'Tokyo', '100', 'Japan', '(03) 3555-5011')\n", + "(5, \"Cooperativa de Quesos 'Las Cabras'\", 'Antonio del Valle Saavedra', 'Calle del Rosal 4', 'Oviedo', '33007', 'Spain', '(98) 598 76 54')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT CustomerName, /*City,*/ Country FROM Customers\n", + "('Alfreds Futterkiste', 'Germany')\n", + "('Ana Trujillo Emparedados y helados', 'Mexico')\n", + "('Antonio Moreno Taquería', 'Mexico')\n", + "('Around the Horn', 'UK')\n", + "('Berglunds snabbköp', 'Sweden')\n", + "...\n", + "--------------------------------------------------\n", + "SELECT * FROM Customers WHERE (CustomerName LIKE 'L%'\n", + "OR CustomerName LIKE 'R%' /*OR CustomerName LIKE 'S%'\n", + "OR CustomerName LIKE 'T%'*/ OR CustomerName LIKE 'W%')\n", + "AND Country='USA'\n", + "ORDER BY CustomerName\n", + "(43, 'Lazy K Kountry Store', 'John Steel', '12 Orchestra Terrace', 'Walla Walla', '99362', 'USA')\n", + "(45, \"Let''s Stop N Shop\", 'Jaime Yorres', '87 Polk St. Suite 5', 'San Francisco', '94117', 'USA')\n", + "(48, 'Lonesome Pine Restaurant', 'Fran Wilson', '89 Chiaroscuro Rd.', 'Portland', '97219', 'USA')\n", + "(65, 'Rattlesnake Canyon Grocery', 'Paula Wilson', '2817 Milton Dr.', 'Albuquerque', '87110', 'USA')\n", + "(89, 'White Clover Markets', 'Karl Jablonski', '305 - 14th Ave. S. Suite 3B', 'Seattle', '98128', 'USA')\n", + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# SQL Comments\n", + "execute_and_print_query(\"\"\"-- Select all:\n", + "SELECT * FROM Customers\"\"\")\n", + "execute_and_print_query(\"\"\"SELECT * FROM Customers -- WHERE City='Berlin'\"\"\")\n", + "execute_and_print_query(\"\"\"-- SELECT * FROM Customers;\n", + "SELECT * FROM Products\"\"\")\n", + "execute_and_print_query(\"\"\"/*Select all the columns\n", + "of all the records\n", + "in the Customers table:*/\n", + "SELECT * FROM Customers\"\"\")\n", + "execute_and_print_query(\"\"\"/*SELECT * FROM Customers;\n", + "SELECT * FROM Products;\n", + "SELECT * FROM Orders;\n", + "SELECT * FROM Categories;*/\n", + "SELECT * FROM Suppliers\"\"\")\n", + "execute_and_print_query(\"SELECT CustomerName, /*City,*/ Country FROM Customers\")\n", + "execute_and_print_query(\"\"\"SELECT * FROM Customers WHERE (CustomerName LIKE 'L%'\n", + "OR CustomerName LIKE 'R%' /*OR CustomerName LIKE 'S%'\n", + "OR CustomerName LIKE 'T%'*/ OR CustomerName LIKE 'W%')\n", + "AND Country='USA'\n", + "ORDER BY CustomerName\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "# SQL Operators" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/missions/W1/M3/README.md b/missions/W1/M3/README.md new file mode 100644 index 0000000..d0113ef --- /dev/null +++ b/missions/W1/M3/README.md @@ -0,0 +1,250 @@ +# GDP ETL Project + +The goal of this project is to implement ETL process for GDP data. + +## Business Requirements +The main purpose of this project is to collect GDP data from IMF website and transform it to a format that can be used for analysis. + +Common analysis use cases are as follows: +1. Filter countries by GDP +2. Top N countries by GDP +3. Group by region + + +### Contents +--- +- [GDP ETL Project](#gdp-etl-project) + - [Business Requirements](#business-requirements) + - [Contents](#contents) + - [Definition of ETL Process](#definition-of-etl-process) + - [1. Extract](#1-extract) + - [2. Transform](#2-transform) + - [3. Load](#3-load) + - [Implementation](#implementation) + - [ETL Process](#etl-process) + - [Modules](#modules) + - [**`importer.py`**](#importerpy) + - [**`exporter.py`**](#exporterpy) + - [**`logger.py`**](#loggerpy) + - [Utils](#utils) + - [**`create_country_region_table.py`**](#create_country_region_tablepy) + - [**`create_large_data_csv.py`**](#create_large_data_csvpy) + - [Performance Experiment](#performance-experiment) + - [Read CSV](#read-csv) + - [Pandas DataFrame](#pandas-dataframe) + - [Parallel/Distributed Processing](#paralleldistributed-processing) + - [Steps](#steps) + +## Definition of ETL Process + +### 1. Extract +- Parse html or read csv file. +- After extraction, the data should follow the format: + ```json + [ + { + "Country": "United States", + "GDP": "30,337,162", + "Region": "North America" + }, + ... + ] + ``` + +### 2. Transform +- Transform GDP value + 1. Convert GDP value string to float + 2. Convert GDP value to billion +- Sort data by GDP +- After transformation, the data should follow the format: + ```json + [ + { + "Country":"United States", + "GDP":30337.16, + "Region":"North America" + }, + ... + ] + ``` + +### 3. Load +- Export the data to a JSON file or sqlite database. +- For optimizing query performance, store the data in GDP order. + +--- + +## Implementation + +### ETL Process +1. **`etl_project_gdp.py`**: ETL process wiki web -> json +2. **`etl_project_gdp_with_sql.py`**: ETL process wiki web -> sqlite +3. **`etl_project_gdp_from_csv.py`**: ETL process csv -> sqlite +4. **`etl_project_gdp_parallel.py`**: ETL process with Parallel/Distributed Design + +### Modules + +#### **`importer.py`** +Extracts data from Wikipedia and saves it to a JSON file. + +Supported Data Source: +- Wikipedia +- CSV File + +Importer Class Hierarchy: +Seperate Interface and Implementation to support multiple data source. +- `ImporterInterface` + - `WebImporterInterface` + - `WikiWebImporter` + - `FileImporterInterface` + - `CsvFileImporter` + +--- + +#### **`exporter.py`** +Exports the data to a JSON file. + +Supported Export Target: +- JSON File +- SQLite Database(.db file) + +Exporter Class Hierarchy: +- `ExporterInterface` + - `JsonFileExporter` + - `SqliteExporter` + +--- + +#### **`logger.py`** +Logs the data to a file. + +Supported Log Level: +- info +- error + +--- + +### Utils + +#### **`create_country_region_table.py`** +Extracts country and region data from Wikipedia and saves it to a JSON file. + +Format: `{country: region}` + +#### **`create_large_data_csv.py`** +Generates a large CSV file for testing. + +--- +## Performance Experiment + +Test Data(Generated by `create_large_data_csv.py`): +- 10M row(260MB) +- 100M row(2.6GB) + +Environment: +- 32GB RAM +- CPU 10 core + +--- + +### Read CSV + +```python +# 10M: 4.51s +# 100M: 48.77s +df = pd.read_csv("large_data.csv") +``` + +If the file is too large to fit in memory, we should use `chunksize` parameter to read the file in chunks. + +```python +chunks = pd.read_csv( + "large_data.csv", + dtype=schema, + header=None, + names=schema.keys(), + chunksize=CHUNKSIZE, +) +df = pd.concat(chunks) +``` + +**Estimated Result:** + +More chunks(smaller chunksize), Slower +- Overhead of creating new dataframe for each chunk +- Overhead of concatenating all chunks + + +**Actual Result:** + +| | chunksize 10K | chunksize 100K | chunksize 1M | none | +| ------------- | ------------- | -------------- | ------------ | ------ | +| datasize 10M | 4.82s | 3.92s | 4.3s | 4.51s | +| datasize 100M | 46.85s | 40.25s | 44.38s | 48.77s | + +Regardless of data size, chunksize 100K is the fastest. + +**Why?** + +Pandas buffer realloc problem? + +--- + +### Pandas DataFrame + +```python +# 10M: 4.70s +# 100M: 50.19s +df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) +df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) +``` + +```python +# 10M: 4.14s +# 100M: 42.46s +df["GDP"] = df["GDP"].apply(lambda x: round(float(x.replace(",", "")) / 1000, 2)) +``` + +```python +# 10M: 3.98s +# 100M: 39.55s +df["GDP"] = ( + pd.to_numeric(df["GDP"].str.replace(",", ""), errors="coerce") + .div(1000) + .round(2) +) +``` + +```python +# 10M: 3.19s +# 100M: 32.64s +df["GDP"] = (df["GDP"].replace(",", "", regex=True).astype(float) / 1000).round(2) +``` + +```python +# 10M: 1.66s +# 100M: 17.05s +df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) +``` + +--- + +## Parallel/Distributed Processing + +See detail in `etl_project_gdp_parallel.py`. + +### Steps + +1. Split one big file to small files + ex) data.csv -> data_0.csv, data_1.csv +2. Preprocess each file + ex) data_0.csv -> data_0_preprocessed.csv +3. Map each file to region + ex) data_0_preprocessed.csv -> data_0_asia.csv, data_0_europe.csv +4. Reduce by region + ex) data_0_asia.csv, data_1_asia.csv -> data_asia.csv +5. Sort by GDP + ex) data_asia.csv -> data_asia_sorted.csv +6. Load to sqlite + ex) data_asia_sorted.csv -> data_asia_sorted.db +7. Query by region diff --git a/missions/W1/M3/etl_project_gdp.py b/missions/W1/M3/etl_project_gdp.py new file mode 100644 index 0000000..dd33214 --- /dev/null +++ b/missions/W1/M3/etl_project_gdp.py @@ -0,0 +1,62 @@ +from modules.logger import logger, init_logger +from modules.importer import WikiWebImporter +from modules.exporter import JsonFileExporter +from pathlib import Path + +HOME_DIR = Path(__file__).resolve().parent +LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" +RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.json" +OUTPUT_FILE_PATH = HOME_DIR / "data/Countries_by_GDP_Transformed.json" + + +def transform_df(df): + """ + Transformation function + """ + # Million -> Billion + df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) + df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) + + # Sort by GDP + df = df.sort_values(by="GDP", ascending=False) + + return df + + +def main(): + init_logger(LOG_FILE_PATH) + logger.print_separator() + logger.info("Starting the ETL process") + + # Extract + # parsing html and store to raw_data_file_path + wiki_importer = WikiWebImporter(raw_data_file_path=RAW_DATA_FILE_PATH) + df = wiki_importer.import_data() + + # Transform + # transform GDP to billion and sort by GDP + logger.info("Transforming data...") + df = transform_df(df) + + # Load + # export to output_file_path + exporter = JsonFileExporter(OUTPUT_FILE_PATH) + exporter.export_data(df) + + logger.info("ETL process completed successfully") + + # Query + df_over_100 = df[df["GDP"] > 100] + print("Countries with GDP > 100B:") + for _, row in df_over_100.iterrows(): + print(f"{row['Country']:<20} {row['GDP']}") + + df_groupby_top5 = df.groupby("Region").head(5) + avg_gdp = df_groupby_top5.groupby("Region")["GDP"].mean() + print("Top 5 Average GDP by Region:") + for region, gdp in avg_gdp.items(): + print(f"{region:<15} {gdp:.2f}") + + +if __name__ == "__main__": + main() diff --git a/missions/W1/M3/etl_project_gdp_from_csv.py b/missions/W1/M3/etl_project_gdp_from_csv.py new file mode 100644 index 0000000..71d5a3c --- /dev/null +++ b/missions/W1/M3/etl_project_gdp_from_csv.py @@ -0,0 +1,119 @@ +import sqlite3 +import time +import pandas as pd +from modules.logger import logger, init_logger +from modules.importer import CsvFileImporter +from modules.exporter import SqliteExporter + +LOG_FILE_PATH = "etl_project_log.txt" +DB_PATH = "World_Economies.db" +TABLE_NAME = "Countries_by_GDP" +INPUT_FILE_PATH = "large_data_10M.csv" +# INPUT_FILE_PATH = "large_data.csv" + +QUERY_1 = """ +SELECT Country, GDP_USD_billion +FROM Countries_by_GDP +WHERE GDP_USD_billion > 100 +ORDER BY GDP_USD_billion DESC +""" +QUERY_2 = """ +SELECT Region, AVG(GDP_USD_billion) FROM +( + SELECT + Country, + GDP_USD_billion, + Region, + ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num + FROM Countries_by_GDP +) +WHERE row_num <= 5 +GROUP BY Region +""" + + +def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Transformation function + """ + time_start = time.time() + # Million -> Billion + # df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) + # df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) + + # df["GDP"] = df["GDP"].apply(lambda x: round(float(x.replace(",", "")) / 1000, 2)) + # df["GDP"] = (df["GDP"].replace(",", "", regex=True).astype(float) / 1000).round(2) + df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) + # df["GDP"] = ( + # pd.to_numeric(df["GDP"].str.replace(",", ""), errors="coerce") + # .div(1000) + # .round(2) + # ) + time_end = time.time() + logger.info(f"Transform GDP: {time_end - time_start:.2f} seconds") + + # Sort by GDP + time_start = time.time() + df = df.sort_values(by="GDP", ascending=False) + time_end = time.time() + logger.info(f"Sort by GDP: {time_end - time_start:.2f} seconds") + + # Rename GDP column to GDP_USD_billion + df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) + + return df + + +def main(): + init_logger(LOG_FILE_PATH) + logger.print_separator() + logger.info("Starting the ETL process") + + # Extract + time_start = time.time() + csv_importer = CsvFileImporter(INPUT_FILE_PATH) + df = csv_importer.import_data() + time_end = time.time() + logger.info(f"Extract Time taken: {time_end - time_start:.2f} seconds") + + # Transform + time_start = time.time() + logger.info("Transforming data...") + df = transfrom_df(df) + time_end = time.time() + logger.info(f"Transform Time taken: {time_end - time_start:.2f} seconds") + + print(df.head(3)) + + # Load + time_start = time.time() + sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) + sqlite_exporter.export_data(df) + time_end = time.time() + logger.info(f"Load Time taken: {time_end - time_start:.2f} seconds") + + logger.info("ETL process completed successfully") + + print("Top 5 Average GDP by Region:") + + time_start = time.time() + df_groupby_top5 = df.groupby("Region").head(5) + avg_gdp = df_groupby_top5.groupby("Region")["GDP_USD_billion"].mean() + for region, gdp in avg_gdp.items(): + print(f"{region:<15} {gdp:.2f}") + time_end = time.time() + logger.info(f"Query with Dataframe : {time_end - time_start:.2f} seconds") + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + time_start = time.time() + cursor.execute(QUERY_2) + for row in cursor: + print(f"{row[0]:<15} {row[1]:.2f}") + time_end = time.time() + logger.info(f"Query with SQLITE: {time_end - time_start:.2f} seconds") + conn.close() + + +if __name__ == "__main__": + main() diff --git a/missions/W1/M3/etl_project_gdp_parallel.py b/missions/W1/M3/etl_project_gdp_parallel.py new file mode 100644 index 0000000..ae218fd --- /dev/null +++ b/missions/W1/M3/etl_project_gdp_parallel.py @@ -0,0 +1,236 @@ +import sqlite3 +import time +import pandas as pd +from modules.logger import logger, init_logger +from multiprocessing import Pool +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor + +LOG_FILE_PATH = "etl_project_log.txt" +# DB_PATH = "World_Economies_1B.db" +DB_NAME = "World_Economies_10M" +TABLE_NAME = "Countries_by_GDP" +# INPUT_FILE_PATH = "large_data_1B.csv" +INPUT_FILE_PATH = "large_data_10M.csv" +DATA_SIZE = 10_000_000 # 10M rows +CHUNK_SIZE = 1_000_000 # 100K rows per chunk +NUM_CHUNKS = DATA_SIZE // CHUNK_SIZE # 100 chunks + +QUERY_1 = """ +SELECT Country, GDP_USD_billion +FROM Countries_by_GDP +WHERE GDP_USD_billion > 100 +ORDER BY GDP_USD_billion DESC +""" +QUERY_2 = """ +SELECT Region, AVG(GDP_USD_billion) FROM +( + SELECT + Country, + GDP_USD_billion, + Region, + ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num + FROM Countries_by_GDP +) +WHERE row_num <= 5 +GROUP BY Region +""" + + +def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Transformation function + """ + + df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) + + # Sort by GDP + # df = df.sort_values(by="GDP", ascending=False) + + # Rename GDP column to GDP_USD_billion + df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) + + return df + + +schema = { + "Country": str, + "GDP": str, + "Region": str, +} + + +def process_chunk(index: int): + df = pd.read_csv( + INPUT_FILE_PATH, + dtype=schema, + header=None, + names=schema.keys(), + skiprows=index * CHUNK_SIZE, + nrows=CHUNK_SIZE, + ) + df.to_csv(f"data/large_data_10M_{index}.csv", index=False) + + +def process_chunk2(index: int, chunk: pd.DataFrame): + print(f"Processing chunk {index}") + chunk.to_csv(f"data/large_data_10M_{index}.csv", index=False) + + +def extract_data_from_source(): + """ + (Extract) Seperate one big file into multiple small files. + + large_data_1B.csv -> large_data_1B_0.csv, large_data_1B_1.csv, ... + """ + + with Pool() as pool: + with pd.read_csv( + INPUT_FILE_PATH, + dtype=schema, + header=None, + names=schema.keys(), + chunksize=CHUNK_SIZE, + ) as reader: + results = [] + for i, chunk in enumerate(reader): + results.append(pool.apply_async(process_chunk2, args=(i, chunk))) + + pool.close() + pool.join() + + +def transform_chunk(index: int): + """ + (Transform - Preprocess) Transform each small file + """ + df = pd.read_csv(f"data/large_data_10M_{index}.csv", dtype=schema) + df = transfrom_df(df) # 기존 transform 함수 사용 + df.to_csv(f"data/large_data_10M_{index}_transformed.csv", index=False) + + +def map_by_region(index: int): + """ + (Transform - Map) Separate each small file by region + """ + df = pd.read_csv(f"data/large_data_10M_{index}_transformed.csv") + regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] + + for region in regions: + region_df = df[df["Region"] == region] + region_df.to_csv(f"data/large_data_10M_{index}_{region}.csv", index=False) + + +def reduce_by_region(region: str): + """ + (Transform - Reduce) Merge all files for each region + """ + all_files = [f"data/large_data_10M_{i}_{region}.csv" for i in range(NUM_CHUNKS)] + dfs = [] + + for file in all_files: + try: + df = pd.read_csv(file) + dfs.append(df) + except FileNotFoundError: + continue + + if dfs: + combined_df = pd.concat(dfs, ignore_index=True) + combined_df.to_csv(f"data/large_data_10M_{region}.csv", index=False) + + +def sort_by_gdp(region: str): + """ + (Transform - Sort) Sort each region file by GDP + """ + df = pd.read_csv(f"data/large_data_10M_{region}.csv") + df = df.sort_values(by="GDP_USD_billion", ascending=False) + df.to_csv(f"data/large_data_10M_{region}_sorted.csv", index=False) + + +def load_to_database(region: str): + """ + (Load) Export each region file to sqlite + """ + conn = sqlite3.connect(f"data/{DB_NAME}_{region}.db") + df = pd.read_csv(f"data/large_data_10M_{region}_sorted.csv") + df.to_sql(TABLE_NAME, conn, if_exists="append", index=False) + conn.close() + + +def query_by_region(region: str): + with sqlite3.connect(f"data/{DB_NAME}_{region}.db") as conn: + cursor = conn.cursor() + cursor.execute( + "SELECT AVG(GDP_USD_billion) FROM (SELECT * FROM Countries_by_GDP ORDER BY GDP_USD_billion DESC LIMIT 5)" + ) + result = cursor.fetchall() + return region, result[0][0] + + +def main(): + init_logger(LOG_FILE_PATH) + logger.print_separator() + logger.info("Starting the Parallel ETL process") + + # 1. Extract + time_start = time.time() + # extract_data_from_source() + with Pool() as pool: + pool.map(process_chunk, range(NUM_CHUNKS)) + # with ThreadPoolExecutor() as executor: + # executor.map(process_chunk, range(NUM_CHUNKS)) + # with ProcessPoolExecutor() as executor: + # executor.map(process_chunk, range(NUM_CHUNKS)) + time_end = time.time() + logger.info(f"Extract data: {time_end - time_start:.2f} seconds") + + # 2. Transform - Preprocess + time_start = time.time() + with Pool() as pool: + pool.map(transform_chunk, range(NUM_CHUNKS)) + time_end = time.time() + logger.info(f"Transform chunks: {time_end - time_start:.2f} seconds") + + # 3. Transform - Map + time_start = time.time() + with Pool() as pool: + pool.map(map_by_region, range(NUM_CHUNKS)) + time_end = time.time() + logger.info(f"Map by region: {time_end - time_start:.2f} seconds") + + # 4. Transform - Reduce + time_start = time.time() + regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] + with Pool() as pool: + pool.map(reduce_by_region, regions) + time_end = time.time() + logger.info(f"Reduce by region: {time_end - time_start:.2f} seconds") + + # 5. Sort + time_start = time.time() + with Pool() as pool: + pool.map(sort_by_gdp, regions) + time_end = time.time() + logger.info(f"Sort by GDP: {time_end - time_start:.2f} seconds") + + # 6. Load + time_start = time.time() + with Pool() as pool: + pool.map(load_to_database, regions) + time_end = time.time() + logger.info(f"Load to database: {time_end - time_start:.2f} seconds") + + # 7. Query + time_start = time.time() + regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] + with Pool() as pool: + results = pool.map(query_by_region, regions) + print(results) + + time_end = time.time() + logger.info(f"Query: {time_end - time_start:.2f} seconds") + + +if __name__ == "__main__": + main() diff --git a/missions/W1/M3/etl_project_gdp_with_sql.py b/missions/W1/M3/etl_project_gdp_with_sql.py new file mode 100644 index 0000000..6106224 --- /dev/null +++ b/missions/W1/M3/etl_project_gdp_with_sql.py @@ -0,0 +1,83 @@ +import sqlite3 + +from modules.logger import logger, init_logger +from modules.importer import WikiWebImporter +from modules.exporter import SqliteExporter + +LOG_FILE_PATH = "etl_project_log.txt" +DB_PATH = "World_Economies.db" +TABLE_NAME = "Countries_by_GDP" + +QUERY_1 = """ +SELECT Country, GDP_USD_billion +FROM Countries_by_GDP +WHERE GDP_USD_billion > 100 +ORDER BY GDP_USD_billion DESC +""" +QUERY_2 = """ +SELECT Region, AVG(GDP_USD_billion) FROM +( + SELECT + Country, + GDP_USD_billion, + Region, + ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num + FROM Countries_by_GDP +) +WHERE row_num <= 5 +GROUP BY Region +""" + + +def transfrom_df(df): + """ + Transformation function + """ + # Million -> Billion + df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) + df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) + + # Sort by GDP + df = df.sort_values(by="GDP", ascending=False) + + # Rename GDP column to GDP_USD_billion + df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) + + return df + + +def main(): + init_logger(LOG_FILE_PATH) + logger.print_separator() + logger.info("Starting the ETL process") + + # Extract + wiki_importer = WikiWebImporter() + df = wiki_importer.import_data() + + # Transform + logger.info("Transforming data...") + df = transfrom_df(df) + + # Load + sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) + sqlite_exporter.export_data(df) + + logger.info("ETL process completed successfully") + + # Query + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(QUERY_1) + print("Countries with GDP > 100B:") + for row in cursor: + print(f"{row[0]:<20} {row[1]}") + cursor.execute(QUERY_2) + print("Top 5 Average GDP by Region:") + for row in cursor: + print(f"{row[0]:<15} {row[1]:.2f}") + conn.close() + + +if __name__ == "__main__": + main() diff --git a/missions/W1/M3/modules/exporter.py b/missions/W1/M3/modules/exporter.py new file mode 100644 index 0000000..69984d6 --- /dev/null +++ b/missions/W1/M3/modules/exporter.py @@ -0,0 +1,50 @@ +from abc import ABC, abstractmethod +import pandas as pd +import sqlite3 + +from modules.logger import logger + + +class ExporterInterface(ABC): + """ + Data exporter interface + """ + + def __init__(self, target: str, **options): + self.target = target + self.options = options + + @abstractmethod + def export_data(self, df: pd.DataFrame): + pass + + +class JsonFileExporter(ExporterInterface): + """ + Json file data exporter + """ + + def export_data(self, df: pd.DataFrame): + logger.info(f"Exporting data to {self.target}...") + df.to_json(self.target, orient="records", indent=2) + logger.info("Data exported successfully") + + +class SqliteExporter(ExporterInterface): + """ + Sqlite data exporter + """ + + def __init__(self, target: str, table_name: str): + if not table_name: + logger.error("table_name is required for SqliteExporter") + raise ValueError("table_name is required for SqliteExporter") + super().__init__(target, table_name=table_name) + + # sqlite connection만 받아오는게 좋을까? -> 굳이? 전부 이 안으로 추상화하자. + def export_data(self, df: pd.DataFrame): + logger.info(f"Exporting data to {self.target}...") + conn = sqlite3.connect(self.target) + df.to_sql(self.options["table_name"], conn, if_exists="replace", index=False) + conn.close() + logger.info("Data exported successfully") diff --git a/missions/W1/M3/modules/importer.py b/missions/W1/M3/modules/importer.py new file mode 100644 index 0000000..4c81118 --- /dev/null +++ b/missions/W1/M3/modules/importer.py @@ -0,0 +1,180 @@ +from abc import ABC, abstractmethod +import pandas as pd +import json +from bs4 import BeautifulSoup +import requests +from pathlib import Path +from modules.logger import logger + + +class ImporterInterface(ABC): + """ + General Data importer interface. + Importer Rule: import data from source and return dataframe. The dataframe should have the following columns: + - Country + - GDP + - Region + """ + + def __init__(self, source: str): + self.source = source + + @abstractmethod + def import_data(self) -> pd.DataFrame: + """ + Import raw data from the source + """ + pass + + +class WebImporterInterface(ImporterInterface): + """ + Web Crawler interface. request -> parse -> return + Subclass should implement _parse_html method. + Web importer는 HTML을 파싱하여 중간 데이터를 만들기 때문에(일종의 Tranform 작업) 중간 데이터를 저장할 수 있는 옵션을 둔다. + """ + + def __init__(self, source: str, raw_data_file_path: str = None): + super().__init__(source) + self.raw_data_file_path = raw_data_file_path + + def import_data(self) -> pd.DataFrame: + logger.info(f"Importing data from {self.source}...") + html = self._get_html() + df = self._parse_html(html) + if self.raw_data_file_path: + self._store_raw_data(self.raw_data_file_path, df) + logger.info(f"Data imported successfully") + return df + + def _get_html(self) -> str: + """ + Fetch HTML from the given URL + """ + try: + response = requests.get(self.source) + response.raise_for_status() + return response.text + except requests.exceptions.RequestException as e: + logger.error(f"ERROR: Failed to fetch HTML from {self.source}") + logger.error(f"ERROR: {e}") + raise e + + @abstractmethod + def _parse_html(self, html: str) -> pd.DataFrame: + """ + Parse HTML to dataframe + """ + pass + + def _store_raw_data(self, path: str, df: pd.DataFrame): + """ + Store raw data to the given file + """ + df.to_json(path, orient="records", indent=2) + + +class WikiWebImporter(WebImporterInterface): + """ + Wiki Web Data Importer class. + """ + + IMF_WIKI_URL = ( + "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_%28nominal%29" + ) + COUNTRY_REGION_TABLE_PATH = ( + Path(__file__).resolve().parent / "../data/country_region_table.json" + ) + + def __init__(self, raw_data_file_path: str = None): + super().__init__(self.IMF_WIKI_URL, raw_data_file_path) + + def import_data(self) -> pd.DataFrame: + return super().import_data() + + # 여기서 region까지 매핑하는 것이 좋을까? .. yes + # why? data importer가 가져오는 데이터의 포맷을 통일하고 싶다. + def _parse_html(self, html: str) -> pd.DataFrame: + data = self._parse_wiki_table_to_df(html) + data = self._map_region(data) + return data + + def _parse_wiki_table_to_df(self, html: str) -> pd.DataFrame: + """ + Parse wikitable to dataframe + """ + soup = BeautifulSoup(html, "html.parser") + table = soup.find("table", class_="wikitable") + data = [] + try: + if not table: + raise Exception("HTML Parsing Error. Wikitable not found") + rows = table.find_all("tr") + for row in rows[3:]: + columns = row.find_all("td") + country = columns[0].text.strip() + gdp = columns[1].text.strip() + if not gdp or gdp == "—": + continue + data.append({"Country": country, "GDP": gdp}) + # TODO: dataframe 만들기 최적화 + return pd.DataFrame(data) + except Exception as e: + logger.error(f"ERROR: Error parsing HTML: {e}") + raise e + + def _map_region(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Map region to the given country + """ + + def parse_json(file_path): + """ + Read JSON file + """ + with open(file_path, "r") as file: + data = json.load(file) + return data + + country_region_table = parse_json(self.COUNTRY_REGION_TABLE_PATH) + df["Region"] = df["Country"].map(country_region_table) + return df + + +class FileImporter(ImporterInterface): + """ + File data importer + File importer는 별도의 trasnform 작업이 없으니 중간 데이터가 없다. + """ + + def import_data(self) -> pd.DataFrame: + logger.info(f"Importing data from {self.source}...") + df = self._parse_file(self.source) + logger.info("Data imported successfully") + return df + + @abstractmethod + def _parse_file(self, file: str) -> pd.DataFrame: + """ + Parse file to dataframe + """ + pass + + +class CsvFileImporter(FileImporter): + """ + Csv file importer + """ + + def _parse_file(self, file: str) -> pd.DataFrame: + schema = { + "Country": str, + "GDP": str, + "Region": str, + } + # df = pd.read_csv(file, dtype=schema, header=None, names=schema.keys()) + chunks = pd.read_csv( + file, dtype=schema, header=None, names=schema.keys(), chunksize=100_000 + ) + df = pd.concat(chunks) + return df diff --git a/missions/W1/M3/modules/logger.py b/missions/W1/M3/modules/logger.py new file mode 100644 index 0000000..a8f1151 --- /dev/null +++ b/missions/W1/M3/modules/logger.py @@ -0,0 +1,38 @@ +import datetime +from pathlib import Path + +DEFAULT_LOG_FILE_PATH = Path(__file__).resolve().parent / "../log/log.txt" + + +class Logger: + """ + Logger class + """ + + def __init__(self, log_file_path: str = DEFAULT_LOG_FILE_PATH): + self.log_file_path = log_file_path + + def info(self, message: str): + self._log("INFO", message) + + def error(self, message: str): + self._log("ERROR", message) + + def print_separator(self): + with open(self.log_file_path, "a") as log_file: + log_file.write("-" * 30 + "\n") + print("-" * 30) + + def _log(self, type: str, message: str): + with open(self.log_file_path, "a") as log_file: + timestamp = datetime.datetime.now() + log_data = f'{timestamp.strftime("%Y-%b-%d-%H-%M-%S")}, {type}: {message}' + log_file.write(log_data + "\n") + print(log_data) + + +logger = Logger() + + +def init_logger(log_file_path: str): + logger.log_file_path = log_file_path diff --git a/missions/W1/M3/utils/create_country_region_table.py b/missions/W1/M3/utils/create_country_region_table.py new file mode 100644 index 0000000..5751562 --- /dev/null +++ b/missions/W1/M3/utils/create_country_region_table.py @@ -0,0 +1,48 @@ +import json +import requests +from bs4 import BeautifulSoup +from pathlib import Path + +TARGET_URL = "https://en.wikipedia.org/wiki/List_of_sovereign_states_and_dependent_territories_by_continent" +OUTPUT_FILE_PATH = "../data/country_region_table.json" + + +def save_to_json(data, file_path): + home_dir = Path(__file__).resolve().parent + out_dir = home_dir / file_path + + with open(out_dir, "w") as file: + json.dump(data, file, indent=2) + + +def main(): + """ + Extract country and region from wikipedia + """ + response = requests.get(TARGET_URL) + soup = BeautifulSoup(response.text, "html.parser") + tables = soup.findAll("table", class_="wikitable")[:6] + regions = ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"] + country_region_table = {} + for table, region in zip(tables, regions): + rows = table.findAll("tr") + for row in rows: + td = row.find("td") + if not td: + continue + b = td.find("b") + if not b: + continue + a = b.find("a") + if not a: + continue + countryName = a["title"] + countryNameAlias = a.text.strip() + country_region_table[countryName] = region + country_region_table[countryNameAlias] = region + + save_to_json(country_region_table, OUTPUT_FILE_PATH) + + +if __name__ == "__main__": + main() diff --git a/missions/W1/M3/utils/create_large_data_csv.py b/missions/W1/M3/utils/create_large_data_csv.py new file mode 100644 index 0000000..bd4035b --- /dev/null +++ b/missions/W1/M3/utils/create_large_data_csv.py @@ -0,0 +1,72 @@ +import random +import string +import sys +import time +from multiprocessing import Pool +from pathlib import Path + +OUTPUT_FILE_PATH = "../data/large_data.csv" + + +def generate_random_name(): + """ + name format: 3~10 characters + first letter should be uppercase and the rest should be lowercase + """ + return random.choice(string.ascii_uppercase) + "".join( + random.choices(string.ascii_lowercase, k=random.randint(2, 9)) + ) + + +def generate_random_gdp(): + """ + gdp format: 1,000,000,000 + max value: 10,000,000 + less number, more posibility + """ + max_value = 10000000 + k = random.choice([max_value, max_value * 0.1, max_value * 0.01]) + random_value = int(k * random.random() ** 1.8) + return f'"{random_value:,}"' + + +def generate_random_region(): + return random.choice( + ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] + ) + + +def generate_data(): + return ( + f"{generate_random_name()},{generate_random_gdp()},{generate_random_region()}\n" + ) + + +def generate_data_chunk(chunk_size: int): + return [generate_data() for _ in range(chunk_size)] + + +def main(): + CHUNK_SIZE = 10000 + row_number = 10000 + if len(sys.argv) > 1: + row_number = int(sys.argv[1]) + + num_chunks = row_number // CHUNK_SIZE + + home_dir = Path(__file__).resolve().parent + out_dir = home_dir / OUTPUT_FILE_PATH + + with open(out_dir, "w") as f: + with Pool() as pool: + for data_chunk in pool.imap_unordered( + generate_data_chunk, [CHUNK_SIZE] * num_chunks + ): + f.writelines(data_chunk) + + +if __name__ == "__main__": + time_start = time.time() + main() + time_end = time.time() + print(f"Time taken: {time_end - time_start:.2f} seconds") diff --git a/missions/W1/create.sql b/missions/W1/create.sql new file mode 100644 index 0000000..c0b3d95 --- /dev/null +++ b/missions/W1/create.sql @@ -0,0 +1,1035 @@ +-- Either: +-- Open DB Browser for SQLite. +-- Create a new database named Northwind.db. +-- Select Execute SQL. +-- Copy and paste the SQL below into the Execute SQL window. +-- Run the script to populate the Northwind database. +-- Or: +-- Run the following command in a terminal window. +-- sqlite3 Northwind.db +-- Copy and paste the SQL below into the terminal window to populate the Northwind database. + +DROP TABLE IF EXISTS OrderDetails; +DROP TABLE IF EXISTS Orders; +DROP TABLE IF EXISTS Products; +DROP TABLE IF EXISTS Categories; +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS Employees; +DROP TABLE IF EXISTS Shippers; +DROP TABLE IF EXISTS Suppliers; + +CREATE TABLE Categories +( + CategoryID INTEGER PRIMARY KEY AUTOINCREMENT, + CategoryName TEXT, + Description TEXT +); + +CREATE TABLE Customers +( + CustomerID INTEGER PRIMARY KEY AUTOINCREMENT, + CustomerName TEXT, + ContactName TEXT, + Address TEXT, + City TEXT, + PostalCode TEXT, + Country TEXT +); + +CREATE TABLE Employees +( + EmployeeID INTEGER PRIMARY KEY AUTOINCREMENT, + LastName TEXT, + FirstName TEXT, + BirthDate DATE, + Photo TEXT, + Notes TEXT +); + +CREATE TABLE Shippers( + ShipperID INTEGER PRIMARY KEY AUTOINCREMENT, + ShipperName TEXT, + Phone TEXT +); + +CREATE TABLE Suppliers( + SupplierID INTEGER PRIMARY KEY AUTOINCREMENT, + SupplierName TEXT, + ContactName TEXT, + Address TEXT, + City TEXT, + PostalCode TEXT, + Country TEXT, + Phone TEXT +); + +CREATE TABLE Products( + ProductID INTEGER PRIMARY KEY AUTOINCREMENT, + ProductName TEXT, + SupplierID INTEGER, + CategoryID INTEGER, + Unit TEXT, + Price NUMERIC DEFAULT 0, + FOREIGN KEY (CategoryID) REFERENCES Categories (CategoryID), + FOREIGN KEY (SupplierID) REFERENCES Suppliers (SupplierID) +); + +CREATE TABLE Orders( + OrderID INTEGER PRIMARY KEY AUTOINCREMENT, + CustomerID INTEGER, + EmployeeID INTEGER, + OrderDate DATETIME, + ShipperID INTEGER, + FOREIGN KEY (EmployeeID) REFERENCES Employees (EmployeeID), + FOREIGN KEY (CustomerID) REFERENCES Customers (CustomerID), + FOREIGN KEY (ShipperID) REFERENCES Shippers (ShipperID) +); + +CREATE TABLE OrderDetails( + OrderDetailID INTEGER PRIMARY KEY AUTOINCREMENT, + OrderID INTEGER, + ProductID INTEGER, + Quantity INTEGER, + FOREIGN KEY (OrderID) REFERENCES Orders (OrderID), + FOREIGN KEY (ProductID) REFERENCES Products (ProductID) +); + +INSERT INTO Categories VALUES(1,'Beverages','Soft drinks, coffees, teas, beers, and ales'); +INSERT INTO Categories VALUES(2,'Condiments','Sweet and savory sauces, relishes, spreads, and seasonings'); +INSERT INTO Categories VALUES(3,'Confections','Desserts, candies, and sweet breads'); +INSERT INTO Categories VALUES(4,'Dairy Products','Cheeses'); +INSERT INTO Categories VALUES(5,'Grains/Cereals','Breads, crackers, pasta, and cereal'); +INSERT INTO Categories VALUES(6,'Meat/Poultry','Prepared meats'); +INSERT INTO Categories VALUES(7,'Produce','Dried fruit and bean curd'); +INSERT INTO Categories VALUES(8,'Seafood','Seaweed and fish'); + +INSERT INTO Customers VALUES(1,'Alfreds Futterkiste','Maria Anders','Obere Str. 57','Berlin','12209','Germany'); +INSERT INTO Customers VALUES(2,'Ana Trujillo Emparedados y helados','Ana Trujillo','Avda. de la Constitución 2222','México D.F.','5021','Mexico'); +INSERT INTO Customers VALUES(3,'Antonio Moreno Taquería','Antonio Moreno','Mataderos 2312','México D.F.','5023','Mexico'); +INSERT INTO Customers VALUES(4,'Around the Horn','Thomas Hardy','120 Hanover Sq.','London','WA1 1DP','UK'); +INSERT INTO Customers VALUES(5,'Berglunds snabbköp','Christina Berglund','Berguvsvägen 8','Luleå','S-958 22','Sweden'); +INSERT INTO Customers VALUES(6,'Blauer See Delikatessen','Hanna Moos','Forsterstr. 57','Mannheim','68306','Germany'); +INSERT INTO Customers VALUES(7,'Blondel père et fils','Frédérique Citeaux','24, place Kléber','Strasbourg','67000','France'); +INSERT INTO Customers VALUES(8,'Bólido Comidas preparadas','Martín Sommer','C/ Araquil, 67','Madrid','28023','Spain'); +INSERT INTO Customers VALUES(9,'Bon app''''','Laurence Lebihans','12, rue des Bouchers','Marseille','13008','France'); +INSERT INTO Customers VALUES(10,'Bottom-Dollar Marketse','Elizabeth Lincoln','23 Tsawassen Blvd.','Tsawassen','T2F 8M4','Canada'); +INSERT INTO Customers VALUES(11,'B''''s Beverages','Victoria Ashworth','Fauntleroy Circus','London','EC2 5NT','UK'); +INSERT INTO Customers VALUES(12,'Cactus Comidas para llevar','Patricio Simpson','Cerrito 333','Buenos Aires','1010','Argentina'); +INSERT INTO Customers VALUES(13,'Centro comercial Moctezuma','Francisco Chang','Sierras de Granada 9993','México D.F.','5022','Mexico'); +INSERT INTO Customers VALUES(14,'Chop-suey Chinese','Yang Wang','Hauptstr. 29','Bern','3012','Switzerland'); +INSERT INTO Customers VALUES(15,'Comércio Mineiro','Pedro Afonso','Av. dos Lusíadas, 23','São Paulo','05432-043','Brazil'); +INSERT INTO Customers VALUES(16,'Consolidated Holdings','Elizabeth Brown','Berkeley Gardens 12 Brewery','London','WX1 6LT','UK'); +INSERT INTO Customers VALUES(17,'Drachenblut Delikatessend','Sven Ottlieb','Walserweg 21','Aachen','52066','Germany'); +INSERT INTO Customers VALUES(18,'Du monde entier','Janine Labrune','67, rue des Cinquante Otages','Nantes','44000','France'); +INSERT INTO Customers VALUES(19,'Eastern Connection','Ann Devon','35 King George','London','WX3 6FW','UK'); +INSERT INTO Customers VALUES(20,'Ernst Handel','Roland Mendel','Kirchgasse 6','Graz','8010','Austria'); +INSERT INTO Customers VALUES(21,'Familia Arquibaldo','Aria Cruz','Rua Orós, 92','São Paulo','05442-030','Brazil'); +INSERT INTO Customers VALUES(22,'FISSA Fabrica Inter. Salchichas S.A.','Diego Roel','C/ Moralzarzal, 86','Madrid','28034','Spain'); +INSERT INTO Customers VALUES(23,'Folies gourmandes','Martine Rancé','184, chaussée de Tournai','Lille','59000','France'); +INSERT INTO Customers VALUES(24,'Folk och fä HB','Maria Larsson','Åkergatan 24','Bräcke','S-844 67','Sweden'); +INSERT INTO Customers VALUES(25,'Frankenversand','Peter Franken','Berliner Platz 43','München','80805','Germany'); +INSERT INTO Customers VALUES(26,'France restauration','Carine Schmitt','54, rue Royale','Nantes','44000','France'); +INSERT INTO Customers VALUES(27,'Franchi S.p.A.','Paolo Accorti','Via Monte Bianco 34','Torino','10100','Italy'); +INSERT INTO Customers VALUES(28,'Furia Bacalhau e Frutos do Mar','Lino Rodriguez','Jardim das rosas n. 32','Lisboa','1675','Portugal'); +INSERT INTO Customers VALUES(29,'Galería del gastrónomo','Eduardo Saavedra','Rambla de Cataluña, 23','Barcelona','8022','Spain'); +INSERT INTO Customers VALUES(30,'Godos Cocina Típica','José Pedro Freyre','C/ Romero, 33','Sevilla','41101','Spain'); +INSERT INTO Customers VALUES(31,'Gourmet Lanchonetes','André Fonseca','Av. Brasil, 442','Campinas','04876-786','Brazil'); +INSERT INTO Customers VALUES(32,'Great Lakes Food Market','Howard Snyder','2732 Baker Blvd.','Eugene','97403','USA'); +INSERT INTO Customers VALUES(33,'GROSELLA-Restaurante','Manuel Pereira','5ª Ave. Los Palos Grandes','Caracas','1081','Venezuela'); +INSERT INTO Customers VALUES(34,'Hanari Carnes','Mario Pontes','Rua do Paço, 67','Rio de Janeiro','05454-876','Brazil'); +INSERT INTO Customers VALUES(35,'HILARIÓN-Abastos','Carlos Hernández','Carrera 22 con Ave. Carlos Soublette #8-35','San Cristóbal','5022','Venezuela'); +INSERT INTO Customers VALUES(36,'Hungry Coyote Import Store','Yoshi Latimer','City Center Plaza 516 Main St.','Elgin','97827','USA'); +INSERT INTO Customers VALUES(37,'Hungry Owl All-Night Grocers','Patricia McKenna','8 Johnstown Road','Cork','','Ireland'); +INSERT INTO Customers VALUES(38,'Island Trading','Helen Bennett','Garden House Crowther Way','Cowes','PO31 7PJ','UK'); +INSERT INTO Customers VALUES(39,'Königlich Essen','Philip Cramer','Maubelstr. 90','Brandenburg','14776','Germany'); +INSERT INTO Customers VALUES(40,'La corne d''''abondance','Daniel Tonini','67, avenue de l''''Europe','Versailles','78000','France'); +INSERT INTO Customers VALUES(41,'La maison d''''Asie','Annette Roulet','1 rue Alsace-Lorraine','Toulouse','31000','France'); +INSERT INTO Customers VALUES(42,'Laughing Bacchus Wine Cellars','Yoshi Tannamuri','1900 Oak St.','Vancouver','V3F 2K1','Canada'); +INSERT INTO Customers VALUES(43,'Lazy K Kountry Store','John Steel','12 Orchestra Terrace','Walla Walla','99362','USA'); +INSERT INTO Customers VALUES(44,'Lehmanns Marktstand','Renate Messner','Magazinweg 7','Frankfurt a.M.','60528','Germany'); +INSERT INTO Customers VALUES(45,'Let''''s Stop N Shop','Jaime Yorres','87 Polk St. Suite 5','San Francisco','94117','USA'); +INSERT INTO Customers VALUES(46,'LILA-Supermercado','Carlos González','Carrera 52 con Ave. Bolívar #65-98 Llano Largo','Barquisimeto','3508','Venezuela'); +INSERT INTO Customers VALUES(47,'LINO-Delicateses','Felipe Izquierdo','Ave. 5 de Mayo Porlamar','I. de Margarita','4980','Venezuela'); +INSERT INTO Customers VALUES(48,'Lonesome Pine Restaurant','Fran Wilson','89 Chiaroscuro Rd.','Portland','97219','USA'); +INSERT INTO Customers VALUES(49,'Magazzini Alimentari Riuniti','Giovanni Rovelli','Via Ludovico il Moro 22','Bergamo','24100','Italy'); +INSERT INTO Customers VALUES(50,'Maison Dewey','Catherine Dewey','Rue Joseph-Bens 532','Bruxelles','B-1180','Belgium'); +INSERT INTO Customers VALUES(51,'Mère Paillarde','Jean Fresnière','43 rue St. Laurent','Montréal','H1J 1C3','Canada'); +INSERT INTO Customers VALUES(52,'Morgenstern Gesundkost','Alexander Feuer','Heerstr. 22','Leipzig','4179','Germany'); +INSERT INTO Customers VALUES(53,'North/South','Simon Crowther','South House 300 Queensbridge','London','SW7 1RZ','UK'); +INSERT INTO Customers VALUES(54,'Océano Atlántico Ltda.','Yvonne Moncada','Ing. Gustavo Moncada 8585 Piso 20-A','Buenos Aires','1010','Argentina'); +INSERT INTO Customers VALUES(55,'Old World Delicatessen','Rene Phillips','2743 Bering St.','Anchorage','99508','USA'); +INSERT INTO Customers VALUES(56,'Ottilies Käseladen','Henriette Pfalzheim','Mehrheimerstr. 369','Köln','50739','Germany'); +INSERT INTO Customers VALUES(57,'Paris spécialités','Marie Bertrand','265, boulevard Charonne','Paris','75012','France'); +INSERT INTO Customers VALUES(58,'Pericles Comidas clásicas','Guillermo Fernández','Calle Dr. Jorge Cash 321','México D.F.','5033','Mexico'); +INSERT INTO Customers VALUES(59,'Piccolo und mehr','Georg Pipps','Geislweg 14','Salzburg','5020','Austria'); +INSERT INTO Customers VALUES(60,'Princesa Isabel Vinhoss','Isabel de Castro','Estrada da saúde n. 58','Lisboa','1756','Portugal'); +INSERT INTO Customers VALUES(61,'Que Delícia','Bernardo Batista','Rua da Panificadora, 12','Rio de Janeiro','02389-673','Brazil'); +INSERT INTO Customers VALUES(62,'Queen Cozinha','Lúcia Carvalho','Alameda dos Canàrios, 891','São Paulo','05487-020','Brazil'); +INSERT INTO Customers VALUES(63,'QUICK-Stop','Horst Kloss','Taucherstraße 10','Cunewalde','1307','Germany'); +INSERT INTO Customers VALUES(64,'Rancho grande','Sergio Gutiérrez','Av. del Libertador 900','Buenos Aires','1010','Argentina'); +INSERT INTO Customers VALUES(65,'Rattlesnake Canyon Grocery','Paula Wilson','2817 Milton Dr.','Albuquerque','87110','USA'); +INSERT INTO Customers VALUES(66,'Reggiani Caseifici','Maurizio Moroni','Strada Provinciale 124','Reggio Emilia','42100','Italy'); +INSERT INTO Customers VALUES(67,'Ricardo Adocicados','Janete Limeira','Av. Copacabana, 267','Rio de Janeiro','02389-890','Brazil'); +INSERT INTO Customers VALUES(68,'Richter Supermarkt','Michael Holz','Grenzacherweg 237','Genève','1203','Switzerland'); +INSERT INTO Customers VALUES(69,'Romero y tomillo','Alejandra Camino','Gran Vía, 1','Madrid','28001','Spain'); +INSERT INTO Customers VALUES(70,'Santé Gourmet','Jonas Bergulfsen','Erling Skakkes gate 78','Stavern','4110','Norway'); +INSERT INTO Customers VALUES(71,'Save-a-lot Markets','Jose Pavarotti','187 Suffolk Ln.','Boise','83720','USA'); +INSERT INTO Customers VALUES(72,'Seven Seas Imports','Hari Kumar','90 Wadhurst Rd.','London','OX15 4NB','UK'); +INSERT INTO Customers VALUES(73,'Simons bistro','Jytte Petersen','Vinbæltet 34','København','1734','Denmark'); +INSERT INTO Customers VALUES(74,'Spécialités du monde','Dominique Perrier','25, rue Lauriston','Paris','75016','France'); +INSERT INTO Customers VALUES(75,'Split Rail Beer & Ale','Art Braunschweiger','P.O. Box 555','Lander','82520','USA'); +INSERT INTO Customers VALUES(76,'Suprêmes délices','Pascale Cartrain','Boulevard Tirou, 255','Charleroi','B-6000','Belgium'); +INSERT INTO Customers VALUES(77,'The Big Cheese','Liz Nixon','89 Jefferson Way Suite 2','Portland','97201','USA'); +INSERT INTO Customers VALUES(78,'The Cracker Box','Liu Wong','55 Grizzly Peak Rd.','Butte','59801','USA'); +INSERT INTO Customers VALUES(79,'Toms Spezialitäten','Karin Josephs','Luisenstr. 48','Münster','44087','Germany'); +INSERT INTO Customers VALUES(80,'Tortuga Restaurante','Miguel Angel Paolino','Avda. Azteca 123','México D.F.','5033','Mexico'); +INSERT INTO Customers VALUES(81,'Tradição Hipermercados','Anabela Domingues','Av. Inês de Castro, 414','São Paulo','05634-030','Brazil'); +INSERT INTO Customers VALUES(82,'Trail''''s Head Gourmet Provisioners','Helvetius Nagy','722 DaVinci Blvd.','Kirkland','98034','USA'); +INSERT INTO Customers VALUES(83,'Vaffeljernet','Palle Ibsen','Smagsløget 45','Århus','8200','Denmark'); +INSERT INTO Customers VALUES(84,'Victuailles en stock','Mary Saveley','2, rue du Commerce','Lyon','69004','France'); +INSERT INTO Customers VALUES(85,'Vins et alcools Chevalier','Paul Henriot','59 rue de l''''Abbaye','Reims','51100','France'); +INSERT INTO Customers VALUES(86,'Die Wandernde Kuh','Rita Müller','Adenauerallee 900','Stuttgart','70563','Germany'); +INSERT INTO Customers VALUES(87,'Wartian Herkku','Pirkko Koskitalo','Torikatu 38','Oulu','90110','Finland'); +INSERT INTO Customers VALUES(88,'Wellington Importadora','Paula Parente','Rua do Mercado, 12','Resende','08737-363','Brazil'); +INSERT INTO Customers VALUES(89,'White Clover Markets','Karl Jablonski','305 - 14th Ave. S. Suite 3B','Seattle','98128','USA'); +INSERT INTO Customers VALUES(90,'Wilman Kala','Matti Karttunen','Keskuskatu 45','Helsinki','21240','Finland'); +INSERT INTO Customers VALUES(91,'Wolski','Zbyszek','ul. Filtrowa 68','Walla','01-012','Poland'); + +INSERT INTO Employees VALUES(1,'Davolio','Nancy','1968-12-08','EmpID1.pic','Education includes a BA in psychology from Colorado State University. She also completed (The Art of the Cold Call). Nancy is a member of ''Toastmasters International''.'); +INSERT INTO Employees VALUES(2,'Fuller','Andrew','1952-02-19','EmpID2.pic','Andrew received his BTS commercial and a Ph.D. in international marketing from the University of Dallas. He is fluent in French and Italian and reads German. He joined the company as a sales representative, was promoted to sales manager and was then named vice president of sales. Andrew is a member of the Sales Management Roundtable, the Seattle Chamber of Commerce, and the Pacific Rim Importers Association.'); +INSERT INTO Employees VALUES(3,'Leverling','Janet','1963-08-30','EmpID3.pic','Janet has a BS degree in chemistry from Boston College). She has also completed a certificate program in food retailing management. Janet was hired as a sales associate and was promoted to sales representative.'); +INSERT INTO Employees VALUES(4,'Peacock','Margaret','1958-09-19','EmpID4.pic','Margaret holds a BA in English literature from Concordia College and an MA from the American Institute of Culinary Arts. She was temporarily assigned to the London office before returning to her permanent post in Seattle.'); +INSERT INTO Employees VALUES(5,'Buchanan','Steven','1955-03-04','EmpID5.pic','Steven Buchanan graduated from St. Andrews University, Scotland, with a BSC degree. Upon joining the company as a sales representative, he spent 6 months in an orientation program at the Seattle office and then returned to his permanent post in London, where he was promoted to sales manager. Mr. Buchanan has completed the courses ''Successful Telemarketing'' and ''International Sales Management''. He is fluent in French.'); +INSERT INTO Employees VALUES(6,'Suyama','Michael','1963-07-02','EmpID6.pic','Michael is a graduate of Sussex University (MA, economics) and the University of California at Los Angeles (MBA, marketing). He has also taken the courses ''Multi-Cultural Selling'' and ''Time Management for the Sales Professional''. He is fluent in Japanese and can read and write French, Portuguese, and Spanish.'); +INSERT INTO Employees VALUES(7,'King','Robert','1960-05-29','EmpID7.pic','Robert King served in the Peace Corps and traveled extensively before completing his degree in English at the University of Michigan and then joining the company. After completing a course entitled ''Selling in Europe'', he was transferred to the London office.'); +INSERT INTO Employees VALUES(8,'Callahan','Laura','1958-01-09','EmpID8.pic','Laura received a BA in psychology from the University of Washington. She has also completed a course in business French. She reads and writes French.'); +INSERT INTO Employees VALUES(9,'Dodsworth','Anne','1969-07-02','EmpID9.pic','Anne has a BA degree in English from St. Lawrence College. She is fluent in French and German.'); +INSERT INTO Employees VALUES(10,'West','Adam','1928-09-19','EmpID10.pic','An old chum.'); + +INSERT INTO Shippers VALUES(1, 'Speedy Express', '(503) 555-9831'); +INSERT INTO Shippers VALUES(2, 'United Package', '(503) 555-3199'); +INSERT INTO Shippers VALUES(3, 'Federal Shipping', '(503) 555-9931'); + +INSERT INTO Suppliers VALUES(1,'Exotic Liquid','Charlotte Cooper','49 Gilbert St.','Londona','EC1 4SD','UK','(171) 555-2222'); +INSERT INTO Suppliers VALUES(2,'New Orleans Cajun Delights','Shelley Burke','P.O. Box 78934','New Orleans','70117','USA','(100) 555-4822'); +INSERT INTO Suppliers VALUES(3,'Grandma Kelly''s Homestead','Regina Murphy','707 Oxford Rd.','Ann Arbor','48104','USA','(313) 555-5735'); +INSERT INTO Suppliers VALUES(4,'Tokyo Traders','Yoshi Nagase','9-8 Sekimai Musashino-shi','Tokyo','100','Japan','(03) 3555-5011'); +INSERT INTO Suppliers VALUES(5,'Cooperativa de Quesos ''Las Cabras''','Antonio del Valle Saavedra','Calle del Rosal 4','Oviedo','33007','Spain','(98) 598 76 54'); +INSERT INTO Suppliers VALUES(6,'Mayumi''s','Mayumi Ohno','92 Setsuko Chuo-ku','Osaka','545','Japan','(06) 431-7877'); +INSERT INTO Suppliers VALUES(7,'Pavlova, Ltd.','Ian Devling','74 Rose St. Moonie Ponds','Melbourne','3058','Australia','(03) 444-2343'); +INSERT INTO Suppliers VALUES(8,'Specialty Biscuits, Ltd.','Peter Wilson','29 King''s Way','Manchester','M14 GSD','UK','(161) 555-4448'); +INSERT INTO Suppliers VALUES(9,'PB Knäckebröd AB','Lars Peterson','Kaloadagatan 13','Göteborg','S-345 67','Sweden','031-987 65 43'); +INSERT INTO Suppliers VALUES(10,'Refrescos Americanas LTDA','Carlos Diaz','Av. das Americanas 12.890','São Paulo','5442','Brazil','(11) 555 4640'); +INSERT INTO Suppliers VALUES(11,'Heli Süßwaren GmbH & Co. KG','Petra Winkler','Tiergartenstraße 5','Berlin','10785','Germany','(010) 9984510'); +INSERT INTO Suppliers VALUES(12,'Plutzer Lebensmittelgroßmärkte AG','Martin Bein','Bogenallee 51','Frankfurt','60439','Germany','(069) 992755'); +INSERT INTO Suppliers VALUES(13,'Nord-Ost-Fisch Handelsgesellschaft mbH','Sven Petersen','Frahmredder 112a','Cuxhaven','27478','Germany','(04721) 8713'); +INSERT INTO Suppliers VALUES(14,'Formaggi Fortini s.r.l.','Elio Rossi','Viale Dante, 75','Ravenna','48100','Italy','(0544) 60323'); +INSERT INTO Suppliers VALUES(15,'Norske Meierier','Beate Vileid','Hatlevegen 5','Sandvika','1320','Norway','(0)2-953010'); +INSERT INTO Suppliers VALUES(16,'Bigfoot Breweries','Cheryl Saylor','3400 - 8th Avenue Suite 210','Bend','97101','USA','(503) 555-9931'); +INSERT INTO Suppliers VALUES(17,'Svensk Sjöföda AB','Michael Björn','Brovallavägen 231','Stockholm','S-123 45','Sweden','08-123 45 67'); +INSERT INTO Suppliers VALUES(18,'Aux joyeux ecclésiastiques','Guylène Nodier','203, Rue des Francs-Bourgeois','Paris','75004','France','(1) 03.83.00.68'); +INSERT INTO Suppliers VALUES(19,'New England Seafood Cannery','Robb Merchant','Order Processing Dept. 2100 Paul Revere Blvd.','Boston','2134','USA','(617) 555-3267'); +INSERT INTO Suppliers VALUES(20,'Leka Trading','Chandra Leka','471 Serangoon Loop, Suite #402','Singapore','512','Singapore','555-8787'); +INSERT INTO Suppliers VALUES(21,'Lyngbysild','Niels Petersen','Lyngbysild Fiskebakken 10','Lyngby','2800','Denmark','43844108'); +INSERT INTO Suppliers VALUES(22,'Zaanse Snoepfabriek','Dirk Luchte','Verkoop Rijnweg 22','Zaandam','9999 ZZ','Netherlands','(12345) 1212'); +INSERT INTO Suppliers VALUES(23,'Karkki Oy','Anne Heikkonen','Valtakatu 12','Lappeenranta','53120','Finland','(953) 10956'); +INSERT INTO Suppliers VALUES(24,'G''day, Mate','Wendy Mackenzie','170 Prince Edward Parade Hunter''s Hill','Sydney','2042','Australia','(02) 555-5914'); +INSERT INTO Suppliers VALUES(25,'Ma Maison','Jean-Guy Lauzon','2960 Rue St. Laurent','Montréal','H1J 1C3','Canada','(514) 555-9022'); +INSERT INTO Suppliers VALUES(26,'Pasta Buttini s.r.l.','Giovanni Giudici','Via dei Gelsomini, 153','Salerno','84100','Italy','(089) 6547665'); +INSERT INTO Suppliers VALUES(27,'Escargots Nouveaux','Marie Delamare','22, rue H. Voiron','Montceau','71300','France','85.57.00.07'); +INSERT INTO Suppliers VALUES(28,'Gai pâturage','Eliane Noz','Bat. B 3, rue des Alpes','Annecy','74000','France','38.76.98.06'); +INSERT INTO Suppliers VALUES(29,'Forêts d''érables','Chantal Goulet','148 rue Chasseur','Ste-Hyacinthe','J2S 7S8','Canada','(514) 555-2955'); + +INSERT INTO Products VALUES(1,'Chais',1,1,'10 boxes x 20 bags',18.00); +INSERT INTO Products VALUES(2,'Chang',1,1,'24 - 12 oz bottles',19.00); +INSERT INTO Products VALUES(3,'Aniseed Syrup',1,2,'12 - 550 ml bottles',10.00); +INSERT INTO Products VALUES(4,'Chef Anton''s Cajun Seasoning',2,2,'48 - 6 oz jars',22.00); +INSERT INTO Products VALUES(5,'Chef Anton''s Gumbo Mix',2,2,'36 boxes',21.35); +INSERT INTO Products VALUES(6,'Grandma''s Boysenberry Spread',3,2,'12 - 8 oz jars',25.00); +INSERT INTO Products VALUES(7,'Uncle Bob''s Organic Dried Pears',3,7,'12 - 1 lb pkgs.',30.00); +INSERT INTO Products VALUES(8,'Northwoods Cranberry Sauce',3,2,'12 - 12 oz jars',40.00); +INSERT INTO Products VALUES(9,'Mishi Kobe Niku',4,6,'18 - 500 g pkgs.',97.00); +INSERT INTO Products VALUES(10,'Ikura',4,8,'12 - 200 ml jars',31.00); +INSERT INTO Products VALUES(11,'Queso Cabrales',5,4,'1 kg pkg.',21.00); +INSERT INTO Products VALUES(12,'Queso Manchego La Pastora',5,4,'10 - 500 g pkgs.',38.00); +INSERT INTO Products VALUES(13,'Konbu',6,8,'2 kg box',6.00); +INSERT INTO Products VALUES(14,'Tofu',6,7,'40 - 100 g pkgs.',23.25); +INSERT INTO Products VALUES(15,'Genen Shouyu',6,2,'24 - 250 ml bottles',15.50); +INSERT INTO Products VALUES(16,'Pavlova',7,3,'32 - 500 g boxes',17.45); +INSERT INTO Products VALUES(17,'Alice Mutton',7,6,'20 - 1 kg tins',39); +INSERT INTO Products VALUES(18,'Carnarvon Tigers',7,8,'16 kg pkg.',62.50); +INSERT INTO Products VALUES(19,'Teatime Chocolate Biscuits',8,3,'10 boxes x 12 pieces',9.20); +INSERT INTO Products VALUES(20,'Sir Rodney''s Marmalade',8,3,'30 gift boxes',81.00); +INSERT INTO Products VALUES(21,'Sir Rodney''s Scones',8,3,'24 pkgs. x 4 pieces',10.00); +INSERT INTO Products VALUES(22,'Gustaf''s Knäckebröd',9,5,'24 - 500 g pkgs.',21.00); +INSERT INTO Products VALUES(23,'Tunnbröd',9,5,'12 - 250 g pkgs.',9.00); +INSERT INTO Products VALUES(24,'Guaraná Fantástica',10,1,'12 - 355 ml cans',4.50); +INSERT INTO Products VALUES(25,'NuNuCa Nuß-Nougat-Creme',11,3,'20 - 450 g glasses',14.00); +INSERT INTO Products VALUES(26,'Gumbär Gummibärchen',11,3,'100 - 250 g bags',31.23); +INSERT INTO Products VALUES(27,'Schoggi Schokolade',11,3,'100 - 100 g pieces',43.90); +INSERT INTO Products VALUES(28,'Rössle Sauerkraut',12,7,'25 - 825 g cans',45.60); +INSERT INTO Products VALUES(29,'Thüringer Rostbratwurst',12,6,'50 bags x 30 sausgs.',123.79); +INSERT INTO Products VALUES(30,'Nord-Ost Matjeshering',13,8,'10 - 200 g glasses',25.89); +INSERT INTO Products VALUES(31,'Gorgonzola Telino',14,4,'12 - 100 g pkgs',12.50); +INSERT INTO Products VALUES(32,'Mascarpone Fabioli',14,4,'24 - 200 g pkgs.',32.00); +INSERT INTO Products VALUES(33,'Geitost',15,4,'500 g',2.50); +INSERT INTO Products VALUES(34,'Sasquatch Ale',16,1,'24 - 12 oz bottles',14.00); +INSERT INTO Products VALUES(35,'Steeleye Stout',16,1,'24 - 12 oz bottles',18.00); +INSERT INTO Products VALUES(36,'Inlagd Sill',17,8,'24 - 250 g jars',19.00); +INSERT INTO Products VALUES(37,'Gravad lax',17,8,'12 - 500 g pkgs.',26.00); +INSERT INTO Products VALUES(38,'Côte de Blaye',18,1,'12 - 75 cl bottles',263.50); +INSERT INTO Products VALUES(39,'Chartreuse verte',18,1,'750 cc per bottle',18.00); +INSERT INTO Products VALUES(40,'Boston Crab Meat',19,8,'24 - 4 oz tins',18.40); +INSERT INTO Products VALUES(41,'Jack''s New England Clam Chowder',19,8,'12 - 12 oz cans',9.65); +INSERT INTO Products VALUES(42,'Singaporean Hokkien Fried Mee',20,5,'32 - 1 kg pkgs.',14.00); +INSERT INTO Products VALUES(43,'Ipoh Coffee',20,1,'16 - 500 g tins',46.00); +INSERT INTO Products VALUES(44,'Gula Malacca',20,2,'20 - 2 kg bags',19.45); +INSERT INTO Products VALUES(45,'Røgede sild',21,8,'1k pkg.',9.50); +INSERT INTO Products VALUES(46,'Spegesild',21,8,'4 - 450 g glasses',12.00); +INSERT INTO Products VALUES(47,'Zaanse koeken',22,3,'10 - 4 oz boxes',9.50); +INSERT INTO Products VALUES(48,'Chocolade',22,3,'10 pkgs.',12.75); +INSERT INTO Products VALUES(49,'Maxilaku',23,3,'24 - 50 g pkgs.',20.00); +INSERT INTO Products VALUES(50,'Valkoinen suklaa',23,3,'12 - 100 g bars',16.25); +INSERT INTO Products VALUES(51,'Manjimup Dried Apples',24,7,'50 - 300 g pkgs.',53.00); +INSERT INTO Products VALUES(52,'Filo Mix',24,5,'16 - 2 kg boxes',7.00); +INSERT INTO Products VALUES(53,'Perth Pasties',24,6,'48 pieces',32.80); +INSERT INTO Products VALUES(54,'Tourtière',25,6,'16 pies',7.45); +INSERT INTO Products VALUES(55,'Pâté chinois',25,6,'24 boxes x 2 pies',24.00); +INSERT INTO Products VALUES(56,'Gnocchi di nonna Alice',26,5,'24 - 250 g pkgs.',38.00); +INSERT INTO Products VALUES(57,'Ravioli Angelo',26,5,'24 - 250 g pkgs.',19.50); +INSERT INTO Products VALUES(58,'Escargots de Bourgogne',27,8,'24 pieces',13.25); +INSERT INTO Products VALUES(59,'Raclette Courdavault',28,4,'5 kg pkg.',55.00); +INSERT INTO Products VALUES(60,'Camembert Pierrot',28,4,'15 - 300 g rounds',34.00); +INSERT INTO Products VALUES(61,'Sirop d''érable',29,2,'24 - 500 ml bottles',28.50); +INSERT INTO Products VALUES(62,'Tarte au sucre',29,3,'48 pies',49.30); +INSERT INTO Products VALUES(63,'Vegie-spread',7,2,'15 - 625 g jars',43.90); +INSERT INTO Products VALUES(64,'Wimmers gute Semmelknödel',12,5,'20 bags x 4 pieces',33.25); +INSERT INTO Products VALUES(65,'Louisiana Fiery Hot Pepper Sauce',2,2,'32 - 8 oz bottles',21.05); +INSERT INTO Products VALUES(66,'Louisiana Hot Spiced Okra',2,2,'24 - 8 oz jars',17.00); +INSERT INTO Products VALUES(67,'Laughing Lumberjack Lager',16,1,'24 - 12 oz bottles',14.00); +INSERT INTO Products VALUES(68,'Scottish Longbreads',8,3,'10 boxes x 8 pieces',12.50); +INSERT INTO Products VALUES(69,'Gudbrandsdalsost',15,4,'10 kg pkg.',36.00); +INSERT INTO Products VALUES(70,'Outback Lager',7,1,'24 - 355 ml bottles',15.00); +INSERT INTO Products VALUES(71,'Fløtemysost',15,4,'10 - 500 g pkgs.',21.50); +INSERT INTO Products VALUES(72,'Mozzarella di Giovanni',14,4,'24 - 200 g pkgs.',34.80); +INSERT INTO Products VALUES(73,'Röd Kaviar',17,8,'24 - 150 g jars',15.00); +INSERT INTO Products VALUES(74,'Longlife Tofu',4,7,'5 kg pkg.',10.00); +INSERT INTO Products VALUES(75,'Rhönbräu Klosterbier',12,1,'24 - 0.5 l bottles',7.75); +INSERT INTO Products VALUES(76,'Lakkalikööri',23,1,'500 ml',18.00); +INSERT INTO Products VALUES(77,'Original Frankfurter grüne Soße',12,2,'12 boxes',13.00); + +INSERT INTO Orders VALUES(10248,90,5,'1996-07-04',3); +INSERT INTO Orders VALUES(10249,81,6,'1996-07-05',1); +INSERT INTO Orders VALUES(10250,34,4,'1996-07-08',2); +INSERT INTO Orders VALUES(10251,84,3,'1996-07-08',1); +INSERT INTO Orders VALUES(10252,76,4,'1996-07-09',2); +INSERT INTO Orders VALUES(10253,34,3,'1996-07-10',2); +INSERT INTO Orders VALUES(10254,14,5,'1996-07-11',2); +INSERT INTO Orders VALUES(10255,68,9,'1996-07-12',3); +INSERT INTO Orders VALUES(10256,88,3,'1996-07-15',2); +INSERT INTO Orders VALUES(10257,35,4,'1996-07-16',3); +INSERT INTO Orders VALUES(10258,20,1,'1996-07-17',1); +INSERT INTO Orders VALUES(10259,13,4,'1996-07-18',3); +INSERT INTO Orders VALUES(10260,55,4,'1996-07-19',1); +INSERT INTO Orders VALUES(10261,61,4,'1996-07-19',2); +INSERT INTO Orders VALUES(10262,65,8,'1996-07-22',3); +INSERT INTO Orders VALUES(10263,20,9,'1996-07-23',3); +INSERT INTO Orders VALUES(10264,24,6,'1996-07-24',3); +INSERT INTO Orders VALUES(10265,7,2,'1996-07-25',1); +INSERT INTO Orders VALUES(10266,87,3,'1996-07-26',3); +INSERT INTO Orders VALUES(10267,25,4,'1996-07-29',1); +INSERT INTO Orders VALUES(10268,33,8,'1996-07-30',3); +INSERT INTO Orders VALUES(10269,89,5,'1996-07-31',1); +INSERT INTO Orders VALUES(10270,87,1,'1996-08-01',1); +INSERT INTO Orders VALUES(10271,75,6,'1996-08-01',2); +INSERT INTO Orders VALUES(10272,65,6,'1996-08-02',2); +INSERT INTO Orders VALUES(10273,63,3,'1996-08-05',3); +INSERT INTO Orders VALUES(10274,85,6,'1996-08-06',1); +INSERT INTO Orders VALUES(10275,49,1,'1996-08-07',1); +INSERT INTO Orders VALUES(10276,80,8,'1996-08-08',3); +INSERT INTO Orders VALUES(10277,52,2,'1996-08-09',3); +INSERT INTO Orders VALUES(10278,5,8,'1996-08-12',2); +INSERT INTO Orders VALUES(10279,44,8,'1996-08-13',2); +INSERT INTO Orders VALUES(10280,5,2,'1996-08-14',1); +INSERT INTO Orders VALUES(10281,69,4,'1996-08-14',1); +INSERT INTO Orders VALUES(10282,69,4,'1996-08-15',1); +INSERT INTO Orders VALUES(10283,46,3,'1996-08-16',3); +INSERT INTO Orders VALUES(10284,44,4,'1996-08-19',1); +INSERT INTO Orders VALUES(10285,63,1,'1996-08-20',2); +INSERT INTO Orders VALUES(10286,63,8,'1996-08-21',3); +INSERT INTO Orders VALUES(10287,67,8,'1996-08-22',3); +INSERT INTO Orders VALUES(10288,66,4,'1996-08-23',1); +INSERT INTO Orders VALUES(10289,11,7,'1996-08-26',3); +INSERT INTO Orders VALUES(10290,15,8,'1996-08-27',1); +INSERT INTO Orders VALUES(10291,61,6,'1996-08-27',2); +INSERT INTO Orders VALUES(10292,81,1,'1996-08-28',2); +INSERT INTO Orders VALUES(10293,80,1,'1996-08-29',3); +INSERT INTO Orders VALUES(10294,65,4,'1996-08-30',2); +INSERT INTO Orders VALUES(10295,85,2,'1996-09-02',2); +INSERT INTO Orders VALUES(10296,46,6,'1996-09-03',1); +INSERT INTO Orders VALUES(10297,7,5,'1996-09-04',2); +INSERT INTO Orders VALUES(10298,37,6,'1996-09-05',2); +INSERT INTO Orders VALUES(10299,67,4,'1996-09-06',2); +INSERT INTO Orders VALUES(10300,49,2,'1996-09-09',2); +INSERT INTO Orders VALUES(10301,86,8,'1996-09-09',2); +INSERT INTO Orders VALUES(10302,76,4,'1996-09-10',2); +INSERT INTO Orders VALUES(10303,30,7,'1996-09-11',2); +INSERT INTO Orders VALUES(10304,80,1,'1996-09-12',2); +INSERT INTO Orders VALUES(10305,55,8,'1996-09-13',3); +INSERT INTO Orders VALUES(10306,69,1,'1996-09-16',3); +INSERT INTO Orders VALUES(10307,48,2,'1996-09-17',2); +INSERT INTO Orders VALUES(10308,2,7,'1996-09-18',3); +INSERT INTO Orders VALUES(10309,37,3,'1996-09-19',1); +INSERT INTO Orders VALUES(10310,77,8,'1996-09-20',2); +INSERT INTO Orders VALUES(10311,18,1,'1996-09-20',3); +INSERT INTO Orders VALUES(10312,86,2,'1996-09-23',2); +INSERT INTO Orders VALUES(10313,63,2,'1996-09-24',2); +INSERT INTO Orders VALUES(10314,65,1,'1996-09-25',2); +INSERT INTO Orders VALUES(10315,38,4,'1996-09-26',2); +INSERT INTO Orders VALUES(10316,65,1,'1996-09-27',3); +INSERT INTO Orders VALUES(10317,48,6,'1996-09-30',1); +INSERT INTO Orders VALUES(10318,38,8,'1996-10-01',2); +INSERT INTO Orders VALUES(10319,80,7,'1996-10-02',3); +INSERT INTO Orders VALUES(10320,87,5,'1996-10-03',3); +INSERT INTO Orders VALUES(10321,38,3,'1996-10-03',2); +INSERT INTO Orders VALUES(10322,58,7,'1996-10-04',3); +INSERT INTO Orders VALUES(10323,39,4,'1996-10-07',1); +INSERT INTO Orders VALUES(10324,71,9,'1996-10-08',1); +INSERT INTO Orders VALUES(10325,39,1,'1996-10-09',3); +INSERT INTO Orders VALUES(10326,8,4,'1996-10-10',2); +INSERT INTO Orders VALUES(10327,24,2,'1996-10-11',1); +INSERT INTO Orders VALUES(10328,28,4,'1996-10-14',3); +INSERT INTO Orders VALUES(10329,75,4,'1996-10-15',2); +INSERT INTO Orders VALUES(10330,46,3,'1996-10-16',1); +INSERT INTO Orders VALUES(10331,9,9,'1996-10-16',1); +INSERT INTO Orders VALUES(10332,51,3,'1996-10-17',2); +INSERT INTO Orders VALUES(10333,87,5,'1996-10-18',3); +INSERT INTO Orders VALUES(10334,84,8,'1996-10-21',2); +INSERT INTO Orders VALUES(10335,37,7,'1996-10-22',2); +INSERT INTO Orders VALUES(10336,60,7,'1996-10-23',2); +INSERT INTO Orders VALUES(10337,25,4,'1996-10-24',3); +INSERT INTO Orders VALUES(10338,55,4,'1996-10-25',3); +INSERT INTO Orders VALUES(10339,51,2,'1996-10-28',2); +INSERT INTO Orders VALUES(10340,9,1,'1996-10-29',3); +INSERT INTO Orders VALUES(10341,73,7,'1996-10-29',3); +INSERT INTO Orders VALUES(10342,25,4,'1996-10-30',2); +INSERT INTO Orders VALUES(10343,44,4,'1996-10-31',1); +INSERT INTO Orders VALUES(10344,89,4,'1996-11-01',2); +INSERT INTO Orders VALUES(10345,63,2,'1996-11-04',2); +INSERT INTO Orders VALUES(10346,65,3,'1996-11-05',3); +INSERT INTO Orders VALUES(10347,21,4,'1996-11-06',3); +INSERT INTO Orders VALUES(10348,86,4,'1996-11-07',2); +INSERT INTO Orders VALUES(10349,75,7,'1996-11-08',1); +INSERT INTO Orders VALUES(10350,41,6,'1996-11-11',2); +INSERT INTO Orders VALUES(10351,20,1,'1996-11-11',1); +INSERT INTO Orders VALUES(10352,28,3,'1996-11-12',3); +INSERT INTO Orders VALUES(10353,59,7,'1996-11-13',3); +INSERT INTO Orders VALUES(10354,58,8,'1996-11-14',3); +INSERT INTO Orders VALUES(10355,4,6,'1996-11-15',1); +INSERT INTO Orders VALUES(10356,86,6,'1996-11-18',2); +INSERT INTO Orders VALUES(10357,46,1,'1996-11-19',3); +INSERT INTO Orders VALUES(10358,41,5,'1996-11-20',1); +INSERT INTO Orders VALUES(10359,72,5,'1996-11-21',3); +INSERT INTO Orders VALUES(10360,7,4,'1996-11-22',3); +INSERT INTO Orders VALUES(10361,63,1,'1996-11-22',2); +INSERT INTO Orders VALUES(10362,9,3,'1996-11-25',1); +INSERT INTO Orders VALUES(10363,17,4,'1996-11-26',3); +INSERT INTO Orders VALUES(10364,19,1,'1996-11-26',1); +INSERT INTO Orders VALUES(10365,3,3,'1996-11-27',2); +INSERT INTO Orders VALUES(10366,29,8,'1996-11-28',2); +INSERT INTO Orders VALUES(10367,83,7,'1996-11-28',3); +INSERT INTO Orders VALUES(10368,20,2,'1996-11-29',2); +INSERT INTO Orders VALUES(10369,75,8,'1996-12-02',2); +INSERT INTO Orders VALUES(10370,14,6,'1996-12-03',2); +INSERT INTO Orders VALUES(10371,41,1,'1996-12-03',1); +INSERT INTO Orders VALUES(10372,62,5,'1996-12-04',2); +INSERT INTO Orders VALUES(10373,37,4,'1996-12-05',3); +INSERT INTO Orders VALUES(10374,91,1,'1996-12-05',3); +INSERT INTO Orders VALUES(10375,36,3,'1996-12-06',2); +INSERT INTO Orders VALUES(10376,51,1,'1996-12-09',2); +INSERT INTO Orders VALUES(10377,72,1,'1996-12-09',3); +INSERT INTO Orders VALUES(10378,24,5,'1996-12-10',3); +INSERT INTO Orders VALUES(10379,61,2,'1996-12-11',1); +INSERT INTO Orders VALUES(10380,37,8,'1996-12-12',3); +INSERT INTO Orders VALUES(10381,46,3,'1996-12-12',3); +INSERT INTO Orders VALUES(10382,20,4,'1996-12-13',1); +INSERT INTO Orders VALUES(10383,4,8,'1996-12-16',3); +INSERT INTO Orders VALUES(10384,5,3,'1996-12-16',3); +INSERT INTO Orders VALUES(10385,75,1,'1996-12-17',2); +INSERT INTO Orders VALUES(10386,21,9,'1996-12-18',3); +INSERT INTO Orders VALUES(10387,70,1,'1996-12-18',2); +INSERT INTO Orders VALUES(10388,72,2,'1996-12-19',1); +INSERT INTO Orders VALUES(10389,10,4,'1996-12-20',2); +INSERT INTO Orders VALUES(10390,20,6,'1996-12-23',1); +INSERT INTO Orders VALUES(10391,17,3,'1996-12-23',3); +INSERT INTO Orders VALUES(10392,59,2,'1996-12-24',3); +INSERT INTO Orders VALUES(10393,71,1,'1996-12-25',3); +INSERT INTO Orders VALUES(10394,36,1,'1996-12-25',3); +INSERT INTO Orders VALUES(10395,35,6,'1996-12-26',1); +INSERT INTO Orders VALUES(10396,25,1,'1996-12-27',3); +INSERT INTO Orders VALUES(10397,60,5,'1996-12-27',1); +INSERT INTO Orders VALUES(10398,71,2,'1996-12-30',3); +INSERT INTO Orders VALUES(10399,83,8,'1996-12-31',3); +INSERT INTO Orders VALUES(10400,19,1,'1997-01-01',3); +INSERT INTO Orders VALUES(10401,65,1,'1997-01-01',1); +INSERT INTO Orders VALUES(10402,20,8,'1997-01-02',2); +INSERT INTO Orders VALUES(10403,20,4,'1997-01-03',3); +INSERT INTO Orders VALUES(10404,49,2,'1997-01-03',1); +INSERT INTO Orders VALUES(10405,47,1,'1997-01-06',1); +INSERT INTO Orders VALUES(10406,62,7,'1997-01-07',1); +INSERT INTO Orders VALUES(10407,56,2,'1997-01-07',2); +INSERT INTO Orders VALUES(10408,23,8,'1997-01-08',1); +INSERT INTO Orders VALUES(10409,54,3,'1997-01-09',1); +INSERT INTO Orders VALUES(10410,10,3,'1997-01-10',3); +INSERT INTO Orders VALUES(10411,10,9,'1997-01-10',3); +INSERT INTO Orders VALUES(10412,87,8,'1997-01-13',2); +INSERT INTO Orders VALUES(10413,41,3,'1997-01-14',2); +INSERT INTO Orders VALUES(10414,21,2,'1997-01-14',3); +INSERT INTO Orders VALUES(10415,36,3,'1997-01-15',1); +INSERT INTO Orders VALUES(10416,87,8,'1997-01-16',3); +INSERT INTO Orders VALUES(10417,73,4,'1997-01-16',3); +INSERT INTO Orders VALUES(10418,63,4,'1997-01-17',1); +INSERT INTO Orders VALUES(10419,68,4,'1997-01-20',2); +INSERT INTO Orders VALUES(10420,88,3,'1997-01-21',1); +INSERT INTO Orders VALUES(10421,61,8,'1997-01-21',1); +INSERT INTO Orders VALUES(10422,27,2,'1997-01-22',1); +INSERT INTO Orders VALUES(10423,31,6,'1997-01-23',3); +INSERT INTO Orders VALUES(10424,51,7,'1997-01-23',2); +INSERT INTO Orders VALUES(10425,41,6,'1997-01-24',2); +INSERT INTO Orders VALUES(10426,29,4,'1997-01-27',1); +INSERT INTO Orders VALUES(10427,59,4,'1997-01-27',2); +INSERT INTO Orders VALUES(10428,66,7,'1997-01-28',1); +INSERT INTO Orders VALUES(10429,37,3,'1997-01-29',2); +INSERT INTO Orders VALUES(10430,20,4,'1997-01-30',1); +INSERT INTO Orders VALUES(10431,10,4,'1997-01-30',2); +INSERT INTO Orders VALUES(10432,75,3,'1997-01-31',2); +INSERT INTO Orders VALUES(10433,60,3,'1997-02-03',3); +INSERT INTO Orders VALUES(10434,24,3,'1997-02-03',2); +INSERT INTO Orders VALUES(10435,16,8,'1997-02-04',2); +INSERT INTO Orders VALUES(10436,7,3,'1997-02-05',2); +INSERT INTO Orders VALUES(10437,87,8,'1997-02-05',1); +INSERT INTO Orders VALUES(10438,79,3,'1997-02-06',2); +INSERT INTO Orders VALUES(10439,51,6,'1997-02-07',3); +INSERT INTO Orders VALUES(10440,71,4,'1997-02-10',2); +INSERT INTO Orders VALUES(10441,55,3,'1997-02-10',2); +INSERT INTO Orders VALUES(10442,20,3,'1997-02-11',2); +INSERT INTO Orders VALUES(10443,66,8,'1997-02-12',1); + +INSERT INTO OrderDetails VALUES(1,10248,11,12); +INSERT INTO OrderDetails VALUES(2,10248,42,10); +INSERT INTO OrderDetails VALUES(3,10248,72,5); +INSERT INTO OrderDetails VALUES(4,10249,14,9); +INSERT INTO OrderDetails VALUES(5,10249,51,40); +INSERT INTO OrderDetails VALUES(6,10250,41,10); +INSERT INTO OrderDetails VALUES(7,10250,51,35); +INSERT INTO OrderDetails VALUES(8,10250,65,15); +INSERT INTO OrderDetails VALUES(9,10251,22,6); +INSERT INTO OrderDetails VALUES(10,10251,57,15); +INSERT INTO OrderDetails VALUES(11,10251,65,20); +INSERT INTO OrderDetails VALUES(12,10252,20,40); +INSERT INTO OrderDetails VALUES(13,10252,33,25); +INSERT INTO OrderDetails VALUES(14,10252,60,40); +INSERT INTO OrderDetails VALUES(15,10253,31,20); +INSERT INTO OrderDetails VALUES(16,10253,39,42); +INSERT INTO OrderDetails VALUES(17,10253,49,40); +INSERT INTO OrderDetails VALUES(18,10254,24,15); +INSERT INTO OrderDetails VALUES(19,10254,55,21); +INSERT INTO OrderDetails VALUES(20,10254,74,21); +INSERT INTO OrderDetails VALUES(21,10255,2,20); +INSERT INTO OrderDetails VALUES(22,10255,16,35); +INSERT INTO OrderDetails VALUES(23,10255,36,25); +INSERT INTO OrderDetails VALUES(24,10255,59,30); +INSERT INTO OrderDetails VALUES(25,10256,53,15); +INSERT INTO OrderDetails VALUES(26,10256,77,12); +INSERT INTO OrderDetails VALUES(27,10257,27,25); +INSERT INTO OrderDetails VALUES(28,10257,39,6); +INSERT INTO OrderDetails VALUES(29,10257,77,15); +INSERT INTO OrderDetails VALUES(30,10258,2,50); +INSERT INTO OrderDetails VALUES(31,10258,5,65); +INSERT INTO OrderDetails VALUES(32,10258,32,6); +INSERT INTO OrderDetails VALUES(33,10259,21,10); +INSERT INTO OrderDetails VALUES(34,10259,37,1); +INSERT INTO OrderDetails VALUES(35,10260,41,16); +INSERT INTO OrderDetails VALUES(36,10260,57,50); +INSERT INTO OrderDetails VALUES(37,10260,62,15); +INSERT INTO OrderDetails VALUES(38,10260,70,21); +INSERT INTO OrderDetails VALUES(39,10261,21,20); +INSERT INTO OrderDetails VALUES(40,10261,35,20); +INSERT INTO OrderDetails VALUES(41,10262,5,12); +INSERT INTO OrderDetails VALUES(42,10262,7,15); +INSERT INTO OrderDetails VALUES(43,10262,56,2); +INSERT INTO OrderDetails VALUES(44,10263,16,60); +INSERT INTO OrderDetails VALUES(45,10263,24,28); +INSERT INTO OrderDetails VALUES(46,10263,30,60); +INSERT INTO OrderDetails VALUES(47,10263,74,36); +INSERT INTO OrderDetails VALUES(48,10264,2,35); +INSERT INTO OrderDetails VALUES(49,10264,41,25); +INSERT INTO OrderDetails VALUES(50,10265,17,30); +INSERT INTO OrderDetails VALUES(51,10265,70,20); +INSERT INTO OrderDetails VALUES(52,10266,12,12); +INSERT INTO OrderDetails VALUES(53,10267,40,50); +INSERT INTO OrderDetails VALUES(54,10267,59,70); +INSERT INTO OrderDetails VALUES(55,10267,76,15); +INSERT INTO OrderDetails VALUES(56,10268,29,10); +INSERT INTO OrderDetails VALUES(57,10268,72,4); +INSERT INTO OrderDetails VALUES(58,10269,33,60); +INSERT INTO OrderDetails VALUES(59,10269,72,20); +INSERT INTO OrderDetails VALUES(60,10270,36,30); +INSERT INTO OrderDetails VALUES(61,10270,43,25); +INSERT INTO OrderDetails VALUES(62,10271,33,24); +INSERT INTO OrderDetails VALUES(63,10272,20,6); +INSERT INTO OrderDetails VALUES(64,10272,31,40); +INSERT INTO OrderDetails VALUES(65,10272,72,24); +INSERT INTO OrderDetails VALUES(66,10273,10,24); +INSERT INTO OrderDetails VALUES(67,10273,31,15); +INSERT INTO OrderDetails VALUES(68,10273,33,20); +INSERT INTO OrderDetails VALUES(69,10273,40,60); +INSERT INTO OrderDetails VALUES(70,10273,76,33); +INSERT INTO OrderDetails VALUES(71,10274,71,20); +INSERT INTO OrderDetails VALUES(72,10274,72,7); +INSERT INTO OrderDetails VALUES(73,10275,24,12); +INSERT INTO OrderDetails VALUES(74,10275,59,6); +INSERT INTO OrderDetails VALUES(75,10276,10,15); +INSERT INTO OrderDetails VALUES(76,10276,13,10); +INSERT INTO OrderDetails VALUES(77,10277,28,20); +INSERT INTO OrderDetails VALUES(78,10277,62,12); +INSERT INTO OrderDetails VALUES(79,10278,44,16); +INSERT INTO OrderDetails VALUES(80,10278,59,15); +INSERT INTO OrderDetails VALUES(81,10278,63,8); +INSERT INTO OrderDetails VALUES(82,10278,73,25); +INSERT INTO OrderDetails VALUES(83,10279,17,15); +INSERT INTO OrderDetails VALUES(84,10280,24,12); +INSERT INTO OrderDetails VALUES(85,10280,55,20); +INSERT INTO OrderDetails VALUES(86,10280,75,30); +INSERT INTO OrderDetails VALUES(87,10281,19,1); +INSERT INTO OrderDetails VALUES(88,10281,24,6); +INSERT INTO OrderDetails VALUES(89,10281,35,4); +INSERT INTO OrderDetails VALUES(90,10282,30,6); +INSERT INTO OrderDetails VALUES(91,10282,57,2); +INSERT INTO OrderDetails VALUES(92,10283,15,20); +INSERT INTO OrderDetails VALUES(93,10283,19,18); +INSERT INTO OrderDetails VALUES(94,10283,60,35); +INSERT INTO OrderDetails VALUES(95,10283,72,3); +INSERT INTO OrderDetails VALUES(96,10284,27,15); +INSERT INTO OrderDetails VALUES(97,10284,44,21); +INSERT INTO OrderDetails VALUES(98,10284,60,20); +INSERT INTO OrderDetails VALUES(99,10284,67,5); +INSERT INTO OrderDetails VALUES(100,10285,1,45); +INSERT INTO OrderDetails VALUES(101,10285,40,40); +INSERT INTO OrderDetails VALUES(102,10285,53,36); +INSERT INTO OrderDetails VALUES(103,10286,35,100); +INSERT INTO OrderDetails VALUES(104,10286,62,40); +INSERT INTO OrderDetails VALUES(105,10287,16,40); +INSERT INTO OrderDetails VALUES(106,10287,34,20); +INSERT INTO OrderDetails VALUES(107,10287,46,15); +INSERT INTO OrderDetails VALUES(108,10288,54,10); +INSERT INTO OrderDetails VALUES(109,10288,68,3); +INSERT INTO OrderDetails VALUES(110,10289,3,30); +INSERT INTO OrderDetails VALUES(111,10289,64,9); +INSERT INTO OrderDetails VALUES(112,10290,5,20); +INSERT INTO OrderDetails VALUES(113,10290,29,15); +INSERT INTO OrderDetails VALUES(114,10290,49,15); +INSERT INTO OrderDetails VALUES(115,10290,77,10); +INSERT INTO OrderDetails VALUES(116,10291,13,20); +INSERT INTO OrderDetails VALUES(117,10291,44,24); +INSERT INTO OrderDetails VALUES(118,10291,51,2); +INSERT INTO OrderDetails VALUES(119,10292,20,20); +INSERT INTO OrderDetails VALUES(120,10293,18,12); +INSERT INTO OrderDetails VALUES(121,10293,24,10); +INSERT INTO OrderDetails VALUES(122,10293,63,5); +INSERT INTO OrderDetails VALUES(123,10293,75,6); +INSERT INTO OrderDetails VALUES(124,10294,1,18); +INSERT INTO OrderDetails VALUES(125,10294,17,15); +INSERT INTO OrderDetails VALUES(126,10294,43,15); +INSERT INTO OrderDetails VALUES(127,10294,60,21); +INSERT INTO OrderDetails VALUES(128,10294,75,6); +INSERT INTO OrderDetails VALUES(129,10295,56,4); +INSERT INTO OrderDetails VALUES(130,10296,11,12); +INSERT INTO OrderDetails VALUES(131,10296,16,30); +INSERT INTO OrderDetails VALUES(132,10296,69,15); +INSERT INTO OrderDetails VALUES(133,10297,39,60); +INSERT INTO OrderDetails VALUES(134,10297,72,20); +INSERT INTO OrderDetails VALUES(135,10298,2,40); +INSERT INTO OrderDetails VALUES(136,10298,36,40); +INSERT INTO OrderDetails VALUES(137,10298,59,30); +INSERT INTO OrderDetails VALUES(138,10298,62,15); +INSERT INTO OrderDetails VALUES(139,10299,19,15); +INSERT INTO OrderDetails VALUES(140,10299,70,20); +INSERT INTO OrderDetails VALUES(141,10300,66,30); +INSERT INTO OrderDetails VALUES(142,10300,68,20); +INSERT INTO OrderDetails VALUES(143,10301,40,10); +INSERT INTO OrderDetails VALUES(144,10301,56,20); +INSERT INTO OrderDetails VALUES(145,10302,17,40); +INSERT INTO OrderDetails VALUES(146,10302,28,28); +INSERT INTO OrderDetails VALUES(147,10302,43,12); +INSERT INTO OrderDetails VALUES(148,10303,40,40); +INSERT INTO OrderDetails VALUES(149,10303,65,30); +INSERT INTO OrderDetails VALUES(150,10303,68,15); +INSERT INTO OrderDetails VALUES(151,10304,49,30); +INSERT INTO OrderDetails VALUES(152,10304,59,10); +INSERT INTO OrderDetails VALUES(153,10304,71,2); +INSERT INTO OrderDetails VALUES(154,10305,18,25); +INSERT INTO OrderDetails VALUES(155,10305,29,25); +INSERT INTO OrderDetails VALUES(156,10305,39,30); +INSERT INTO OrderDetails VALUES(157,10306,30,10); +INSERT INTO OrderDetails VALUES(158,10306,53,10); +INSERT INTO OrderDetails VALUES(159,10306,54,5); +INSERT INTO OrderDetails VALUES(160,10307,62,10); +INSERT INTO OrderDetails VALUES(161,10307,68,3); +INSERT INTO OrderDetails VALUES(162,10308,69,1); +INSERT INTO OrderDetails VALUES(163,10308,70,5); +INSERT INTO OrderDetails VALUES(164,10309,4,20); +INSERT INTO OrderDetails VALUES(165,10309,6,30); +INSERT INTO OrderDetails VALUES(166,10309,42,2); +INSERT INTO OrderDetails VALUES(167,10309,43,20); +INSERT INTO OrderDetails VALUES(168,10309,71,3); +INSERT INTO OrderDetails VALUES(169,10310,16,10); +INSERT INTO OrderDetails VALUES(170,10310,62,5); +INSERT INTO OrderDetails VALUES(171,10311,42,6); +INSERT INTO OrderDetails VALUES(172,10311,69,7); +INSERT INTO OrderDetails VALUES(173,10312,28,4); +INSERT INTO OrderDetails VALUES(174,10312,43,24); +INSERT INTO OrderDetails VALUES(175,10312,53,20); +INSERT INTO OrderDetails VALUES(176,10312,75,10); +INSERT INTO OrderDetails VALUES(177,10313,36,12); +INSERT INTO OrderDetails VALUES(178,10314,32,40); +INSERT INTO OrderDetails VALUES(179,10314,58,30); +INSERT INTO OrderDetails VALUES(180,10314,62,25); +INSERT INTO OrderDetails VALUES(181,10315,34,14); +INSERT INTO OrderDetails VALUES(182,10315,70,30); +INSERT INTO OrderDetails VALUES(183,10316,41,10); +INSERT INTO OrderDetails VALUES(184,10316,62,70); +INSERT INTO OrderDetails VALUES(185,10317,1,20); +INSERT INTO OrderDetails VALUES(186,10318,41,20); +INSERT INTO OrderDetails VALUES(187,10318,76,6); +INSERT INTO OrderDetails VALUES(188,10319,17,8); +INSERT INTO OrderDetails VALUES(189,10319,28,14); +INSERT INTO OrderDetails VALUES(190,10319,76,30); +INSERT INTO OrderDetails VALUES(191,10320,71,30); +INSERT INTO OrderDetails VALUES(192,10321,35,10); +INSERT INTO OrderDetails VALUES(193,10322,52,20); +INSERT INTO OrderDetails VALUES(194,10323,15,5); +INSERT INTO OrderDetails VALUES(195,10323,25,4); +INSERT INTO OrderDetails VALUES(196,10323,39,4); +INSERT INTO OrderDetails VALUES(197,10324,16,21); +INSERT INTO OrderDetails VALUES(198,10324,35,70); +INSERT INTO OrderDetails VALUES(199,10324,46,30); +INSERT INTO OrderDetails VALUES(200,10324,59,40); +INSERT INTO OrderDetails VALUES(201,10324,63,80); +INSERT INTO OrderDetails VALUES(202,10325,6,6); +INSERT INTO OrderDetails VALUES(203,10325,13,12); +INSERT INTO OrderDetails VALUES(204,10325,14,9); +INSERT INTO OrderDetails VALUES(205,10325,31,4); +INSERT INTO OrderDetails VALUES(206,10325,72,40); +INSERT INTO OrderDetails VALUES(207,10326,4,24); +INSERT INTO OrderDetails VALUES(208,10326,57,16); +INSERT INTO OrderDetails VALUES(209,10326,75,50); +INSERT INTO OrderDetails VALUES(210,10327,2,25); +INSERT INTO OrderDetails VALUES(211,10327,11,50); +INSERT INTO OrderDetails VALUES(212,10327,30,35); +INSERT INTO OrderDetails VALUES(213,10327,58,30); +INSERT INTO OrderDetails VALUES(214,10328,59,9); +INSERT INTO OrderDetails VALUES(215,10328,65,40); +INSERT INTO OrderDetails VALUES(216,10328,68,10); +INSERT INTO OrderDetails VALUES(217,10329,19,10); +INSERT INTO OrderDetails VALUES(218,10329,30,8); +INSERT INTO OrderDetails VALUES(219,10329,38,20); +INSERT INTO OrderDetails VALUES(220,10329,56,12); +INSERT INTO OrderDetails VALUES(221,10330,26,50); +INSERT INTO OrderDetails VALUES(222,10330,72,25); +INSERT INTO OrderDetails VALUES(223,10331,54,15); +INSERT INTO OrderDetails VALUES(224,10332,18,40); +INSERT INTO OrderDetails VALUES(225,10332,42,10); +INSERT INTO OrderDetails VALUES(226,10332,47,16); +INSERT INTO OrderDetails VALUES(227,10333,14,10); +INSERT INTO OrderDetails VALUES(228,10333,21,10); +INSERT INTO OrderDetails VALUES(229,10333,71,40); +INSERT INTO OrderDetails VALUES(230,10334,52,8); +INSERT INTO OrderDetails VALUES(231,10334,68,10); +INSERT INTO OrderDetails VALUES(232,10335,2,7); +INSERT INTO OrderDetails VALUES(233,10335,31,25); +INSERT INTO OrderDetails VALUES(234,10335,32,6); +INSERT INTO OrderDetails VALUES(235,10335,51,48); +INSERT INTO OrderDetails VALUES(236,10336,4,18); +INSERT INTO OrderDetails VALUES(237,10337,23,40); +INSERT INTO OrderDetails VALUES(238,10337,26,24); +INSERT INTO OrderDetails VALUES(239,10337,36,20); +INSERT INTO OrderDetails VALUES(240,10337,37,28); +INSERT INTO OrderDetails VALUES(241,10337,72,25); +INSERT INTO OrderDetails VALUES(242,10338,17,20); +INSERT INTO OrderDetails VALUES(243,10338,30,15); +INSERT INTO OrderDetails VALUES(244,10339,4,10); +INSERT INTO OrderDetails VALUES(245,10339,17,70); +INSERT INTO OrderDetails VALUES(246,10339,62,28); +INSERT INTO OrderDetails VALUES(247,10340,18,20); +INSERT INTO OrderDetails VALUES(248,10340,41,12); +INSERT INTO OrderDetails VALUES(249,10340,43,40); +INSERT INTO OrderDetails VALUES(250,10341,33,8); +INSERT INTO OrderDetails VALUES(251,10341,59,9); +INSERT INTO OrderDetails VALUES(252,10342,2,24); +INSERT INTO OrderDetails VALUES(253,10342,31,56); +INSERT INTO OrderDetails VALUES(254,10342,36,40); +INSERT INTO OrderDetails VALUES(255,10342,55,40); +INSERT INTO OrderDetails VALUES(256,10343,64,50); +INSERT INTO OrderDetails VALUES(257,10343,68,4); +INSERT INTO OrderDetails VALUES(258,10343,76,15); +INSERT INTO OrderDetails VALUES(259,10344,4,35); +INSERT INTO OrderDetails VALUES(260,10344,8,70); +INSERT INTO OrderDetails VALUES(261,10345,8,70); +INSERT INTO OrderDetails VALUES(262,10345,19,80); +INSERT INTO OrderDetails VALUES(263,10345,42,9); +INSERT INTO OrderDetails VALUES(264,10346,17,36); +INSERT INTO OrderDetails VALUES(265,10346,56,20); +INSERT INTO OrderDetails VALUES(266,10347,25,10); +INSERT INTO OrderDetails VALUES(267,10347,39,50); +INSERT INTO OrderDetails VALUES(268,10347,40,4); +INSERT INTO OrderDetails VALUES(269,10347,75,6); +INSERT INTO OrderDetails VALUES(270,10348,1,15); +INSERT INTO OrderDetails VALUES(271,10348,23,25); +INSERT INTO OrderDetails VALUES(272,10349,54,24); +INSERT INTO OrderDetails VALUES(273,10350,50,15); +INSERT INTO OrderDetails VALUES(274,10350,69,18); +INSERT INTO OrderDetails VALUES(275,10351,38,20); +INSERT INTO OrderDetails VALUES(276,10351,41,13); +INSERT INTO OrderDetails VALUES(277,10351,44,77); +INSERT INTO OrderDetails VALUES(278,10351,65,10); +INSERT INTO OrderDetails VALUES(279,10352,24,10); +INSERT INTO OrderDetails VALUES(280,10352,54,20); +INSERT INTO OrderDetails VALUES(281,10353,11,12); +INSERT INTO OrderDetails VALUES(282,10353,38,50); +INSERT INTO OrderDetails VALUES(283,10354,1,12); +INSERT INTO OrderDetails VALUES(284,10354,29,4); +INSERT INTO OrderDetails VALUES(285,10355,24,25); +INSERT INTO OrderDetails VALUES(286,10355,57,25); +INSERT INTO OrderDetails VALUES(287,10356,31,30); +INSERT INTO OrderDetails VALUES(288,10356,55,12); +INSERT INTO OrderDetails VALUES(289,10356,69,20); +INSERT INTO OrderDetails VALUES(290,10357,10,30); +INSERT INTO OrderDetails VALUES(291,10357,26,16); +INSERT INTO OrderDetails VALUES(292,10357,60,8); +INSERT INTO OrderDetails VALUES(293,10358,24,10); +INSERT INTO OrderDetails VALUES(294,10358,34,10); +INSERT INTO OrderDetails VALUES(295,10358,36,20); +INSERT INTO OrderDetails VALUES(296,10359,16,56); +INSERT INTO OrderDetails VALUES(297,10359,31,70); +INSERT INTO OrderDetails VALUES(298,10359,60,80); +INSERT INTO OrderDetails VALUES(299,10360,28,30); +INSERT INTO OrderDetails VALUES(300,10360,29,35); +INSERT INTO OrderDetails VALUES(301,10360,38,10); +INSERT INTO OrderDetails VALUES(302,10360,49,35); +INSERT INTO OrderDetails VALUES(303,10360,54,28); +INSERT INTO OrderDetails VALUES(304,10361,39,54); +INSERT INTO OrderDetails VALUES(305,10361,60,55); +INSERT INTO OrderDetails VALUES(306,10362,25,50); +INSERT INTO OrderDetails VALUES(307,10362,51,20); +INSERT INTO OrderDetails VALUES(308,10362,54,24); +INSERT INTO OrderDetails VALUES(309,10363,31,20); +INSERT INTO OrderDetails VALUES(310,10363,75,12); +INSERT INTO OrderDetails VALUES(311,10363,76,12); +INSERT INTO OrderDetails VALUES(312,10364,69,30); +INSERT INTO OrderDetails VALUES(313,10364,71,5); +INSERT INTO OrderDetails VALUES(314,10365,11,24); +INSERT INTO OrderDetails VALUES(315,10366,65,5); +INSERT INTO OrderDetails VALUES(316,10366,77,5); +INSERT INTO OrderDetails VALUES(317,10367,34,36); +INSERT INTO OrderDetails VALUES(318,10367,54,18); +INSERT INTO OrderDetails VALUES(319,10367,65,15); +INSERT INTO OrderDetails VALUES(320,10367,77,7); +INSERT INTO OrderDetails VALUES(321,10368,21,5); +INSERT INTO OrderDetails VALUES(322,10368,28,13); +INSERT INTO OrderDetails VALUES(323,10368,57,25); +INSERT INTO OrderDetails VALUES(324,10368,64,35); +INSERT INTO OrderDetails VALUES(325,10369,29,20); +INSERT INTO OrderDetails VALUES(326,10369,56,18); +INSERT INTO OrderDetails VALUES(327,10370,1,15); +INSERT INTO OrderDetails VALUES(328,10370,64,30); +INSERT INTO OrderDetails VALUES(329,10370,74,20); +INSERT INTO OrderDetails VALUES(330,10371,36,6); +INSERT INTO OrderDetails VALUES(331,10372,20,12); +INSERT INTO OrderDetails VALUES(332,10372,38,40); +INSERT INTO OrderDetails VALUES(333,10372,60,70); +INSERT INTO OrderDetails VALUES(334,10372,72,42); +INSERT INTO OrderDetails VALUES(335,10373,58,80); +INSERT INTO OrderDetails VALUES(336,10373,71,50); +INSERT INTO OrderDetails VALUES(337,10374,31,30); +INSERT INTO OrderDetails VALUES(338,10374,58,15); +INSERT INTO OrderDetails VALUES(339,10375,14,15); +INSERT INTO OrderDetails VALUES(340,10375,54,10); +INSERT INTO OrderDetails VALUES(341,10376,31,42); +INSERT INTO OrderDetails VALUES(342,10377,28,20); +INSERT INTO OrderDetails VALUES(343,10377,39,20); +INSERT INTO OrderDetails VALUES(344,10378,71,6); +INSERT INTO OrderDetails VALUES(345,10379,41,8); +INSERT INTO OrderDetails VALUES(346,10379,63,16); +INSERT INTO OrderDetails VALUES(347,10379,65,20); +INSERT INTO OrderDetails VALUES(348,10380,30,18); +INSERT INTO OrderDetails VALUES(349,10380,53,20); +INSERT INTO OrderDetails VALUES(350,10380,60,6); +INSERT INTO OrderDetails VALUES(351,10380,70,30); +INSERT INTO OrderDetails VALUES(352,10381,74,14); +INSERT INTO OrderDetails VALUES(353,10382,5,32); +INSERT INTO OrderDetails VALUES(354,10382,18,9); +INSERT INTO OrderDetails VALUES(355,10382,29,14); +INSERT INTO OrderDetails VALUES(356,10382,33,60); +INSERT INTO OrderDetails VALUES(357,10382,74,50); +INSERT INTO OrderDetails VALUES(358,10383,13,20); +INSERT INTO OrderDetails VALUES(359,10383,50,15); +INSERT INTO OrderDetails VALUES(360,10383,56,20); +INSERT INTO OrderDetails VALUES(361,10384,20,28); +INSERT INTO OrderDetails VALUES(362,10384,60,15); +INSERT INTO OrderDetails VALUES(363,10385,7,10); +INSERT INTO OrderDetails VALUES(364,10385,60,20); +INSERT INTO OrderDetails VALUES(365,10385,68,8); +INSERT INTO OrderDetails VALUES(366,10386,24,15); +INSERT INTO OrderDetails VALUES(367,10386,34,10); +INSERT INTO OrderDetails VALUES(368,10387,24,15); +INSERT INTO OrderDetails VALUES(369,10387,28,6); +INSERT INTO OrderDetails VALUES(370,10387,59,12); +INSERT INTO OrderDetails VALUES(371,10387,71,15); +INSERT INTO OrderDetails VALUES(372,10388,45,15); +INSERT INTO OrderDetails VALUES(373,10388,52,20); +INSERT INTO OrderDetails VALUES(374,10388,53,40); +INSERT INTO OrderDetails VALUES(375,10389,10,16); +INSERT INTO OrderDetails VALUES(376,10389,55,15); +INSERT INTO OrderDetails VALUES(377,10389,62,20); +INSERT INTO OrderDetails VALUES(378,10389,70,30); +INSERT INTO OrderDetails VALUES(379,10390,31,60); +INSERT INTO OrderDetails VALUES(380,10390,35,40); +INSERT INTO OrderDetails VALUES(381,10390,46,45); +INSERT INTO OrderDetails VALUES(382,10390,72,24); +INSERT INTO OrderDetails VALUES(383,10391,13,18); +INSERT INTO OrderDetails VALUES(384,10392,69,50); +INSERT INTO OrderDetails VALUES(385,10393,2,25); +INSERT INTO OrderDetails VALUES(386,10393,14,42); +INSERT INTO OrderDetails VALUES(387,10393,25,7); +INSERT INTO OrderDetails VALUES(388,10393,26,70); +INSERT INTO OrderDetails VALUES(389,10393,31,32); +INSERT INTO OrderDetails VALUES(390,10394,13,10); +INSERT INTO OrderDetails VALUES(391,10394,62,10); +INSERT INTO OrderDetails VALUES(392,10395,46,28); +INSERT INTO OrderDetails VALUES(393,10395,53,70); +INSERT INTO OrderDetails VALUES(394,10395,69,8); +INSERT INTO OrderDetails VALUES(395,10396,23,40); +INSERT INTO OrderDetails VALUES(396,10396,71,60); +INSERT INTO OrderDetails VALUES(397,10396,72,21); +INSERT INTO OrderDetails VALUES(398,10397,21,10); +INSERT INTO OrderDetails VALUES(399,10397,51,18); +INSERT INTO OrderDetails VALUES(400,10398,35,30); +INSERT INTO OrderDetails VALUES(401,10398,55,120); +INSERT INTO OrderDetails VALUES(402,10399,68,60); +INSERT INTO OrderDetails VALUES(403,10399,71,30); +INSERT INTO OrderDetails VALUES(404,10399,76,35); +INSERT INTO OrderDetails VALUES(405,10399,77,14); +INSERT INTO OrderDetails VALUES(406,10400,29,21); +INSERT INTO OrderDetails VALUES(407,10400,35,35); +INSERT INTO OrderDetails VALUES(408,10400,49,30); +INSERT INTO OrderDetails VALUES(409,10401,30,18); +INSERT INTO OrderDetails VALUES(410,10401,56,70); +INSERT INTO OrderDetails VALUES(411,10401,65,20); +INSERT INTO OrderDetails VALUES(412,10401,71,60); +INSERT INTO OrderDetails VALUES(413,10402,23,60); +INSERT INTO OrderDetails VALUES(414,10402,63,65); +INSERT INTO OrderDetails VALUES(415,10403,16,21); +INSERT INTO OrderDetails VALUES(416,10403,48,70); +INSERT INTO OrderDetails VALUES(417,10404,26,30); +INSERT INTO OrderDetails VALUES(418,10404,42,40); +INSERT INTO OrderDetails VALUES(419,10404,49,30); +INSERT INTO OrderDetails VALUES(420,10405,3,50); +INSERT INTO OrderDetails VALUES(421,10406,1,10); +INSERT INTO OrderDetails VALUES(422,10406,21,30); +INSERT INTO OrderDetails VALUES(423,10406,28,42); +INSERT INTO OrderDetails VALUES(424,10406,36,5); +INSERT INTO OrderDetails VALUES(425,10406,40,2); +INSERT INTO OrderDetails VALUES(426,10407,11,30); +INSERT INTO OrderDetails VALUES(427,10407,69,15); +INSERT INTO OrderDetails VALUES(428,10407,71,15); +INSERT INTO OrderDetails VALUES(429,10408,37,10); +INSERT INTO OrderDetails VALUES(430,10408,54,6); +INSERT INTO OrderDetails VALUES(431,10408,62,35); +INSERT INTO OrderDetails VALUES(432,10409,14,12); +INSERT INTO OrderDetails VALUES(433,10409,21,12); +INSERT INTO OrderDetails VALUES(434,10410,33,49); +INSERT INTO OrderDetails VALUES(435,10410,59,16); +INSERT INTO OrderDetails VALUES(436,10411,41,25); +INSERT INTO OrderDetails VALUES(437,10411,44,40); +INSERT INTO OrderDetails VALUES(438,10411,59,9); +INSERT INTO OrderDetails VALUES(439,10412,14,20); +INSERT INTO OrderDetails VALUES(440,10413,1,24); +INSERT INTO OrderDetails VALUES(441,10413,62,40); +INSERT INTO OrderDetails VALUES(442,10413,76,14); +INSERT INTO OrderDetails VALUES(443,10414,19,18); +INSERT INTO OrderDetails VALUES(444,10414,33,50); +INSERT INTO OrderDetails VALUES(445,10415,17,2); +INSERT INTO OrderDetails VALUES(446,10415,33,20); +INSERT INTO OrderDetails VALUES(447,10416,19,20); +INSERT INTO OrderDetails VALUES(448,10416,53,10); +INSERT INTO OrderDetails VALUES(449,10416,57,20); +INSERT INTO OrderDetails VALUES(450,10417,38,50); +INSERT INTO OrderDetails VALUES(451,10417,46,2); +INSERT INTO OrderDetails VALUES(452,10417,68,36); +INSERT INTO OrderDetails VALUES(453,10417,77,35); +INSERT INTO OrderDetails VALUES(454,10418,2,60); +INSERT INTO OrderDetails VALUES(455,10418,47,55); +INSERT INTO OrderDetails VALUES(456,10418,61,16); +INSERT INTO OrderDetails VALUES(457,10418,74,15); +INSERT INTO OrderDetails VALUES(458,10419,60,60); +INSERT INTO OrderDetails VALUES(459,10419,69,20); +INSERT INTO OrderDetails VALUES(460,10420,9,20); +INSERT INTO OrderDetails VALUES(461,10420,13,2); +INSERT INTO OrderDetails VALUES(462,10420,70,8); +INSERT INTO OrderDetails VALUES(463,10420,73,20); +INSERT INTO OrderDetails VALUES(464,10421,19,4); +INSERT INTO OrderDetails VALUES(465,10421,26,30); +INSERT INTO OrderDetails VALUES(466,10421,53,15); +INSERT INTO OrderDetails VALUES(467,10421,77,10); +INSERT INTO OrderDetails VALUES(468,10422,26,2); +INSERT INTO OrderDetails VALUES(469,10423,31,14); +INSERT INTO OrderDetails VALUES(470,10423,59,20); +INSERT INTO OrderDetails VALUES(471,10424,35,60); +INSERT INTO OrderDetails VALUES(472,10424,38,49); +INSERT INTO OrderDetails VALUES(473,10424,68,30); +INSERT INTO OrderDetails VALUES(474,10425,55,10); +INSERT INTO OrderDetails VALUES(475,10425,76,20); +INSERT INTO OrderDetails VALUES(476,10426,56,5); +INSERT INTO OrderDetails VALUES(477,10426,64,7); +INSERT INTO OrderDetails VALUES(478,10427,14,35); +INSERT INTO OrderDetails VALUES(479,10428,46,20); +INSERT INTO OrderDetails VALUES(480,10429,50,40); +INSERT INTO OrderDetails VALUES(481,10429,63,35); +INSERT INTO OrderDetails VALUES(482,10430,17,45); +INSERT INTO OrderDetails VALUES(483,10430,21,50); +INSERT INTO OrderDetails VALUES(484,10430,56,30); +INSERT INTO OrderDetails VALUES(485,10430,59,70); +INSERT INTO OrderDetails VALUES(486,10431,17,50); +INSERT INTO OrderDetails VALUES(487,10431,40,50); +INSERT INTO OrderDetails VALUES(488,10431,47,30); +INSERT INTO OrderDetails VALUES(489,10432,26,10); +INSERT INTO OrderDetails VALUES(490,10432,54,40); +INSERT INTO OrderDetails VALUES(491,10433,56,28); +INSERT INTO OrderDetails VALUES(492,10434,11,6); +INSERT INTO OrderDetails VALUES(493,10434,76,18); +INSERT INTO OrderDetails VALUES(494,10435,2,10); +INSERT INTO OrderDetails VALUES(495,10435,22,12); +INSERT INTO OrderDetails VALUES(496,10435,72,10); +INSERT INTO OrderDetails VALUES(497,10436,46,5); +INSERT INTO OrderDetails VALUES(498,10436,56,40); +INSERT INTO OrderDetails VALUES(499,10436,64,30); +INSERT INTO OrderDetails VALUES(500,10436,75,24); +INSERT INTO OrderDetails VALUES(501,10437,53,15); +INSERT INTO OrderDetails VALUES(502,10438,19,15); +INSERT INTO OrderDetails VALUES(503,10438,34,20); +INSERT INTO OrderDetails VALUES(504,10438,57,15); +INSERT INTO OrderDetails VALUES(505,10439,12,15); +INSERT INTO OrderDetails VALUES(506,10439,16,16); +INSERT INTO OrderDetails VALUES(507,10439,64,6); +INSERT INTO OrderDetails VALUES(508,10439,74,30); +INSERT INTO OrderDetails VALUES(509,10440,2,45); +INSERT INTO OrderDetails VALUES(510,10440,16,49); +INSERT INTO OrderDetails VALUES(511,10440,29,24); +INSERT INTO OrderDetails VALUES(512,10440,61,90); +INSERT INTO OrderDetails VALUES(513,10441,27,50); +INSERT INTO OrderDetails VALUES(514,10442,11,30); +INSERT INTO OrderDetails VALUES(515,10442,54,80); +INSERT INTO OrderDetails VALUES(516,10442,66,60); +INSERT INTO OrderDetails VALUES(517,10443,11,6); +INSERT INTO OrderDetails VALUES(518,10443,28,12); \ No newline at end of file diff --git a/missions/W1/mtcars.csv b/missions/W1/mtcars.csv index 08529b5..a22b9c2 100644 --- a/missions/W1/mtcars.csv +++ b/missions/W1/mtcars.csv @@ -1,33 +1,33 @@ -"","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" -"Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4 -"Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4 -"Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 -"Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 -"Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 -"Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 -"Duster 360",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 -"Merc 240D",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 -"Merc 230",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 -"Merc 280",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 -"Merc 280C",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 -"Merc 450SE",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 -"Merc 450SL",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 -"Merc 450SLC",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 -"Cadillac Fleetwood",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 -"Lincoln Continental",10.4,8,460,215,3,5.424,17.82,0,0,3,4 -"Chrysler Imperial",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 -"Fiat 128",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 -"Honda Civic",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 -"Toyota Corolla",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 -"Toyota Corona",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 -"Dodge Challenger",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 -"AMC Javelin",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 -"Camaro Z28",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 -"Pontiac Firebird",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 -"Fiat X1-9",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 -"Porsche 914-2",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 -"Lotus Europa",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 -"Ford Pantera L",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 -"Ferrari Dino",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 -"Maserati Bora",15,8,301,335,3.54,3.57,14.6,0,1,5,8 -"Volvo 142E",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 +"","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" +"Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4 +"Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4 +"Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 +"Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 +"Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 +"Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 +"Duster 360",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 +"Merc 240D",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 +"Merc 230",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 +"Merc 280",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 +"Merc 280C",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 +"Merc 450SE",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 +"Merc 450SL",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 +"Merc 450SLC",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 +"Cadillac Fleetwood",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 +"Lincoln Continental",10.4,8,460,215,3,5.424,17.82,0,0,3,4 +"Chrysler Imperial",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 +"Fiat 128",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 +"Honda Civic",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 +"Toyota Corolla",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 +"Toyota Corona",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 +"Dodge Challenger",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 +"AMC Javelin",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 +"Camaro Z28",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 +"Pontiac Firebird",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 +"Fiat X1-9",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 +"Porsche 914-2",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 +"Lotus Europa",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 +"Ford Pantera L",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 +"Ferrari Dino",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 +"Maserati Bora",15,8,301,335,3.54,3.57,14.6,0,1,5,8 +"Volvo 142E",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 From 6292397ce9aa954eac88694914769396ada5024d Mon Sep 17 00:00:00 2001 From: openkmj Date: Thu, 9 Jan 2025 18:45:43 +0900 Subject: [PATCH 2/4] fix --- .gitignore | 2 +- missions/W1/M3/README.md | 4 + missions/W1/M3/etl_project_gdp.py | 3 +- missions/W1/M3/etl_project_gdp_from_csv.py | 49 +++----- missions/W1/M3/etl_project_gdp_parallel.py | 117 ++++++++---------- missions/W1/M3/etl_project_gdp_with_sql.py | 10 +- missions/W1/M3/modules/logger.py | 19 +++ .../M3/utils/create_country_region_table.py | 6 + 8 files changed, 104 insertions(+), 106 deletions(-) diff --git a/.gitignore b/.gitignore index 3baab37..c0b44ba 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,4 @@ large_data*.csv __pycache__ *.db *.json -*.log \ No newline at end of file +*log.txt \ No newline at end of file diff --git a/missions/W1/M3/README.md b/missions/W1/M3/README.md index d0113ef..5316ce6 100644 --- a/missions/W1/M3/README.md +++ b/missions/W1/M3/README.md @@ -231,6 +231,10 @@ df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) ## Parallel/Distributed Processing +Main idea: +- Split file and process each file in parallel +- Store data seperately by region + See detail in `etl_project_gdp_parallel.py`. ### Steps diff --git a/missions/W1/M3/etl_project_gdp.py b/missions/W1/M3/etl_project_gdp.py index dd33214..863b6f9 100644 --- a/missions/W1/M3/etl_project_gdp.py +++ b/missions/W1/M3/etl_project_gdp.py @@ -14,8 +14,7 @@ def transform_df(df): Transformation function """ # Million -> Billion - df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) - df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) + df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) # Sort by GDP df = df.sort_values(by="GDP", ascending=False) diff --git a/missions/W1/M3/etl_project_gdp_from_csv.py b/missions/W1/M3/etl_project_gdp_from_csv.py index 71d5a3c..9ae3090 100644 --- a/missions/W1/M3/etl_project_gdp_from_csv.py +++ b/missions/W1/M3/etl_project_gdp_from_csv.py @@ -1,15 +1,16 @@ import sqlite3 import time import pandas as pd -from modules.logger import logger, init_logger +from modules.logger import logger, init_logger, LogExecutionTime from modules.importer import CsvFileImporter from modules.exporter import SqliteExporter +from pathlib import Path -LOG_FILE_PATH = "etl_project_log.txt" -DB_PATH = "World_Economies.db" +HOME_DIR = Path(__file__).resolve().parent +LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" +DB_PATH = HOME_DIR / "data/World_Economies.db" +INPUT_FILE_PATH = HOME_DIR / "data/large_data.csv" TABLE_NAME = "Countries_by_GDP" -INPUT_FILE_PATH = "large_data_10M.csv" -# INPUT_FILE_PATH = "large_data.csv" QUERY_1 = """ SELECT Country, GDP_USD_billion @@ -36,7 +37,6 @@ def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: """ Transformation function """ - time_start = time.time() # Million -> Billion # df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) # df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) @@ -49,14 +49,9 @@ def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: # .div(1000) # .round(2) # ) - time_end = time.time() - logger.info(f"Transform GDP: {time_end - time_start:.2f} seconds") # Sort by GDP - time_start = time.time() df = df.sort_values(by="GDP", ascending=False) - time_end = time.time() - logger.info(f"Sort by GDP: {time_end - time_start:.2f} seconds") # Rename GDP column to GDP_USD_billion df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) @@ -70,48 +65,34 @@ def main(): logger.info("Starting the ETL process") # Extract - time_start = time.time() - csv_importer = CsvFileImporter(INPUT_FILE_PATH) - df = csv_importer.import_data() - time_end = time.time() - logger.info(f"Extract Time taken: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Extract"): + csv_importer = CsvFileImporter(INPUT_FILE_PATH) + df = csv_importer.import_data() # Transform - time_start = time.time() - logger.info("Transforming data...") - df = transfrom_df(df) - time_end = time.time() - logger.info(f"Transform Time taken: {time_end - time_start:.2f} seconds") - - print(df.head(3)) + with LogExecutionTime("Transform"): + df = transfrom_df(df) # Load - time_start = time.time() - sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) - sqlite_exporter.export_data(df) - time_end = time.time() - logger.info(f"Load Time taken: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Load"): + sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) + sqlite_exporter.export_data(df) logger.info("ETL process completed successfully") + # Query print("Top 5 Average GDP by Region:") - time_start = time.time() df_groupby_top5 = df.groupby("Region").head(5) avg_gdp = df_groupby_top5.groupby("Region")["GDP_USD_billion"].mean() for region, gdp in avg_gdp.items(): print(f"{region:<15} {gdp:.2f}") - time_end = time.time() - logger.info(f"Query with Dataframe : {time_end - time_start:.2f} seconds") conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - time_start = time.time() cursor.execute(QUERY_2) for row in cursor: print(f"{row[0]:<15} {row[1]:.2f}") - time_end = time.time() - logger.info(f"Query with SQLITE: {time_end - time_start:.2f} seconds") conn.close() diff --git a/missions/W1/M3/etl_project_gdp_parallel.py b/missions/W1/M3/etl_project_gdp_parallel.py index ae218fd..4aafcac 100644 --- a/missions/W1/M3/etl_project_gdp_parallel.py +++ b/missions/W1/M3/etl_project_gdp_parallel.py @@ -1,16 +1,16 @@ import sqlite3 import time import pandas as pd -from modules.logger import logger, init_logger +from pathlib import Path +from modules.logger import logger, init_logger, LogExecutionTime from multiprocessing import Pool from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor -LOG_FILE_PATH = "etl_project_log.txt" -# DB_PATH = "World_Economies_1B.db" -DB_NAME = "World_Economies_10M" +HOME_DIR = Path(__file__).resolve().parent +LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" +DB_NAME = "World_Economies" TABLE_NAME = "Countries_by_GDP" -# INPUT_FILE_PATH = "large_data_1B.csv" -INPUT_FILE_PATH = "large_data_10M.csv" +INPUT_FILE_PATH = HOME_DIR / "data/large_data.csv" DATA_SIZE = 10_000_000 # 10M rows CHUNK_SIZE = 1_000_000 # 100K rows per chunk NUM_CHUNKS = DATA_SIZE // CHUNK_SIZE # 100 chunks @@ -43,9 +43,6 @@ def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) - # Sort by GDP - # df = df.sort_values(by="GDP", ascending=False) - # Rename GDP column to GDP_USD_billion df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) @@ -68,12 +65,12 @@ def process_chunk(index: int): skiprows=index * CHUNK_SIZE, nrows=CHUNK_SIZE, ) - df.to_csv(f"data/large_data_10M_{index}.csv", index=False) + df.to_csv(f"data/large_data_{index}.csv", index=False) def process_chunk2(index: int, chunk: pd.DataFrame): print(f"Processing chunk {index}") - chunk.to_csv(f"data/large_data_10M_{index}.csv", index=False) + chunk.to_csv(f"data/large_data_{index}.csv", index=False) def extract_data_from_source(): @@ -103,28 +100,28 @@ def transform_chunk(index: int): """ (Transform - Preprocess) Transform each small file """ - df = pd.read_csv(f"data/large_data_10M_{index}.csv", dtype=schema) - df = transfrom_df(df) # 기존 transform 함수 사용 - df.to_csv(f"data/large_data_10M_{index}_transformed.csv", index=False) + df = pd.read_csv(f"data/large_data_{index}.csv", dtype=schema) + df = transfrom_df(df) + df.to_csv(f"data/large_data_{index}_transformed.csv", index=False) def map_by_region(index: int): """ (Transform - Map) Separate each small file by region """ - df = pd.read_csv(f"data/large_data_10M_{index}_transformed.csv") + df = pd.read_csv(f"data/large_data_{index}_transformed.csv") regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] for region in regions: region_df = df[df["Region"] == region] - region_df.to_csv(f"data/large_data_10M_{index}_{region}.csv", index=False) + region_df.to_csv(f"data/large_data_{index}_{region}.csv", index=False) def reduce_by_region(region: str): """ (Transform - Reduce) Merge all files for each region """ - all_files = [f"data/large_data_10M_{i}_{region}.csv" for i in range(NUM_CHUNKS)] + all_files = [f"data/large_data_{i}_{region}.csv" for i in range(NUM_CHUNKS)] dfs = [] for file in all_files: @@ -136,16 +133,16 @@ def reduce_by_region(region: str): if dfs: combined_df = pd.concat(dfs, ignore_index=True) - combined_df.to_csv(f"data/large_data_10M_{region}.csv", index=False) + combined_df.to_csv(f"data/large_data_{region}.csv", index=False) def sort_by_gdp(region: str): """ (Transform - Sort) Sort each region file by GDP """ - df = pd.read_csv(f"data/large_data_10M_{region}.csv") + df = pd.read_csv(f"data/large_data_{region}.csv") df = df.sort_values(by="GDP_USD_billion", ascending=False) - df.to_csv(f"data/large_data_10M_{region}_sorted.csv", index=False) + df.to_csv(f"data/large_data_{region}_sorted.csv", index=False) def load_to_database(region: str): @@ -153,7 +150,7 @@ def load_to_database(region: str): (Load) Export each region file to sqlite """ conn = sqlite3.connect(f"data/{DB_NAME}_{region}.db") - df = pd.read_csv(f"data/large_data_10M_{region}_sorted.csv") + df = pd.read_csv(f"data/large_data_{region}_sorted.csv") df.to_sql(TABLE_NAME, conn, if_exists="append", index=False) conn.close() @@ -174,62 +171,54 @@ def main(): logger.info("Starting the Parallel ETL process") # 1. Extract - time_start = time.time() - # extract_data_from_source() - with Pool() as pool: - pool.map(process_chunk, range(NUM_CHUNKS)) - # with ThreadPoolExecutor() as executor: - # executor.map(process_chunk, range(NUM_CHUNKS)) - # with ProcessPoolExecutor() as executor: - # executor.map(process_chunk, range(NUM_CHUNKS)) - time_end = time.time() - logger.info(f"Extract data: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Extract data"): + # extract_data_from_source() + with Pool() as pool: + pool.map(process_chunk, range(NUM_CHUNKS)) + # with ThreadPoolExecutor() as executor: + # executor.map(process_chunk, range(NUM_CHUNKS)) + # with ProcessPoolExecutor() as executor: + # executor.map(process_chunk, range(NUM_CHUNKS)) # 2. Transform - Preprocess - time_start = time.time() - with Pool() as pool: - pool.map(transform_chunk, range(NUM_CHUNKS)) - time_end = time.time() - logger.info(f"Transform chunks: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Transform chunks"): + with Pool() as pool: + pool.map(transform_chunk, range(NUM_CHUNKS)) # 3. Transform - Map - time_start = time.time() - with Pool() as pool: - pool.map(map_by_region, range(NUM_CHUNKS)) - time_end = time.time() - logger.info(f"Map by region: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Map by region"): + with Pool() as pool: + pool.map(map_by_region, range(NUM_CHUNKS)) # 4. Transform - Reduce - time_start = time.time() - regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] - with Pool() as pool: - pool.map(reduce_by_region, regions) - time_end = time.time() - logger.info(f"Reduce by region: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Reduce by region"): + regions = [ + "Asia", + "Europe", + "Africa", + "North America", + "South America", + "Oceania", + ] + with Pool() as pool: + pool.map(reduce_by_region, regions) # 5. Sort - time_start = time.time() - with Pool() as pool: - pool.map(sort_by_gdp, regions) - time_end = time.time() - logger.info(f"Sort by GDP: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Sort by GDP"): + with Pool() as pool: + pool.map(sort_by_gdp, regions) # 6. Load - time_start = time.time() - with Pool() as pool: - pool.map(load_to_database, regions) - time_end = time.time() - logger.info(f"Load to database: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Load to database"): + with Pool() as pool: + pool.map(load_to_database, regions) # 7. Query - time_start = time.time() regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] - with Pool() as pool: - results = pool.map(query_by_region, regions) - print(results) - - time_end = time.time() - logger.info(f"Query: {time_end - time_start:.2f} seconds") + with LogExecutionTime("Query by region"): + with Pool() as pool: + results = pool.map(query_by_region, regions) + print(results) if __name__ == "__main__": diff --git a/missions/W1/M3/etl_project_gdp_with_sql.py b/missions/W1/M3/etl_project_gdp_with_sql.py index 6106224..98f1642 100644 --- a/missions/W1/M3/etl_project_gdp_with_sql.py +++ b/missions/W1/M3/etl_project_gdp_with_sql.py @@ -1,11 +1,12 @@ import sqlite3 - +from pathlib import Path from modules.logger import logger, init_logger from modules.importer import WikiWebImporter from modules.exporter import SqliteExporter -LOG_FILE_PATH = "etl_project_log.txt" -DB_PATH = "World_Economies.db" +HOME_DIR = Path(__file__).resolve().parent +LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" +DB_PATH = HOME_DIR / "data/World_Economies.db" TABLE_NAME = "Countries_by_GDP" QUERY_1 = """ @@ -34,8 +35,7 @@ def transfrom_df(df): Transformation function """ # Million -> Billion - df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) - df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) + df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) # Sort by GDP df = df.sort_values(by="GDP", ascending=False) diff --git a/missions/W1/M3/modules/logger.py b/missions/W1/M3/modules/logger.py index a8f1151..f3acffe 100644 --- a/missions/W1/M3/modules/logger.py +++ b/missions/W1/M3/modules/logger.py @@ -1,4 +1,5 @@ import datetime +import time from pathlib import Path DEFAULT_LOG_FILE_PATH = Path(__file__).resolve().parent / "../log/log.txt" @@ -36,3 +37,21 @@ def _log(self, type: str, message: str): def init_logger(log_file_path: str): logger.log_file_path = log_file_path + + +class LogExecutionTime: + """ + Execution time logger + """ + + def __init__(self, description: str): + self.description = description + + def __enter__(self): + self.start_time = time.time() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.end_time = time.time() + self.execution_time = self.end_time - self.start_time + logger.info(f"{self.description} took {self.execution_time:.2f} seconds") diff --git a/missions/W1/M3/utils/create_country_region_table.py b/missions/W1/M3/utils/create_country_region_table.py index 5751562..1369871 100644 --- a/missions/W1/M3/utils/create_country_region_table.py +++ b/missions/W1/M3/utils/create_country_region_table.py @@ -41,6 +41,12 @@ def main(): country_region_table[countryName] = region country_region_table[countryNameAlias] = region + # manually add + country_region_table["DR Congo"] = "Africa" + country_region_table["Congo"] = "Africa" + country_region_table["Bahamas"] = "North America" + country_region_table["Gambia"] = "Africa" + save_to_json(country_region_table, OUTPUT_FILE_PATH) From ba9f7859e8a68f06ae93ce644a10242ec0d9d252 Mon Sep 17 00:00:00 2001 From: openkmj Date: Fri, 10 Jan 2025 18:52:19 +0900 Subject: [PATCH 3/4] fix --- missions/W1/M3/README.md | 37 +++++-- missions/W1/M3/config.py | 11 ++ missions/W1/M3/etl_project_gdp.py | 51 ++-------- missions/W1/M3/etl_project_gdp_from_csv.py | 88 ++-------------- missions/W1/M3/etl_project_gdp_parallel.py | 111 ++++++++------------- missions/W1/M3/etl_project_gdp_with_sql.py | 76 +++----------- missions/W1/M3/modules/importer.py | 12 ++- missions/W1/M3/modules/query_helper.py | 72 +++++++++++++ missions/W1/M3/modules/transformer.py | 20 ++++ 9 files changed, 216 insertions(+), 262 deletions(-) create mode 100644 missions/W1/M3/config.py create mode 100644 missions/W1/M3/modules/query_helper.py create mode 100644 missions/W1/M3/modules/transformer.py diff --git a/missions/W1/M3/README.md b/missions/W1/M3/README.md index 5316ce6..b742608 100644 --- a/missions/W1/M3/README.md +++ b/missions/W1/M3/README.md @@ -16,7 +16,7 @@ Common analysis use cases are as follows: - [GDP ETL Project](#gdp-etl-project) - [Business Requirements](#business-requirements) - [Contents](#contents) - - [Definition of ETL Process](#definition-of-etl-process) + - [ETL Process Overview](#etl-process-overview) - [1. Extract](#1-extract) - [2. Transform](#2-transform) - [3. Load](#3-load) @@ -24,8 +24,10 @@ Common analysis use cases are as follows: - [ETL Process](#etl-process) - [Modules](#modules) - [**`importer.py`**](#importerpy) + - [**`transformer.py`**](#transformerpy) - [**`exporter.py`**](#exporterpy) - [**`logger.py`**](#loggerpy) + - [**`query_helper.py`**](#query_helperpy) - [Utils](#utils) - [**`create_country_region_table.py`**](#create_country_region_tablepy) - [**`create_large_data_csv.py`**](#create_large_data_csvpy) @@ -35,11 +37,15 @@ Common analysis use cases are as follows: - [Parallel/Distributed Processing](#paralleldistributed-processing) - [Steps](#steps) -## Definition of ETL Process +## ETL Process Overview ### 1. Extract -- Parse html or read csv file. -- After extraction, the data should follow the format: +- Move data from external system to workspace +- This process will be abstracted in `importer.py` along with parsing process. + +### 2. Transform +- Prasing raw data to structured format + - After parsing, the data should follow the format: ```json [ { @@ -47,11 +53,10 @@ Common analysis use cases are as follows: "GDP": "30,337,162", "Region": "North America" }, - ... + // ... ] ``` - -### 2. Transform + - This process will be abstracted in `importer.py` along with extracting process. - Transform GDP value 1. Convert GDP value string to float 2. Convert GDP value to billion @@ -64,7 +69,7 @@ Common analysis use cases are as follows: "GDP":30337.16, "Region":"North America" }, - ... + // ... ] ``` @@ -85,7 +90,7 @@ Common analysis use cases are as follows: ### Modules #### **`importer.py`** -Extracts data from Wikipedia and saves it to a JSON file. +Extracts data from data source and parse it to structured format. Supported Data Source: - Wikipedia @@ -99,6 +104,15 @@ Seperate Interface and Implementation to support multiple data source. - `FileImporterInterface` - `CsvFileImporter` +`ImporterInterface` defines the interface for all importers. +`WebImporterInterface` and `FileImporterInterface` defines how to extract data from data source. +`WikiWebImporter` and `CsvFileImporter` defines how to parse data from data source. + +--- + +#### **`transformer.py`** +Functions for transforming data. + --- #### **`exporter.py`** @@ -124,6 +138,11 @@ Supported Log Level: --- +#### **`query_helper.py`** +Functions for querying data. + +--- + ### Utils #### **`create_country_region_table.py`** diff --git a/missions/W1/M3/config.py b/missions/W1/M3/config.py new file mode 100644 index 0000000..c367da9 --- /dev/null +++ b/missions/W1/M3/config.py @@ -0,0 +1,11 @@ +from pathlib import Path + +HOME_DIR = Path(__file__).resolve().parent +LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" +RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.json" +OUTPUT_FILE_PATH = HOME_DIR / "data/Countries_by_GDP_Transformed.json" +DB_NAME = "World_Economies" +TABLE_NAME = "Countries_by_GDP" +DB_PATH = HOME_DIR / f"data/{DB_NAME}.db" +CSV_FILE_NAME = "large_data" +CSV_INPUT_FILE_PATH = HOME_DIR / f"data/{CSV_FILE_NAME}.csv" diff --git a/missions/W1/M3/etl_project_gdp.py b/missions/W1/M3/etl_project_gdp.py index 863b6f9..67d7958 100644 --- a/missions/W1/M3/etl_project_gdp.py +++ b/missions/W1/M3/etl_project_gdp.py @@ -1,25 +1,12 @@ +from config import LOG_FILE_PATH, RAW_DATA_FILE_PATH, OUTPUT_FILE_PATH from modules.logger import logger, init_logger from modules.importer import WikiWebImporter +from modules.transformer import transform_gdp from modules.exporter import JsonFileExporter -from pathlib import Path - -HOME_DIR = Path(__file__).resolve().parent -LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" -RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.json" -OUTPUT_FILE_PATH = HOME_DIR / "data/Countries_by_GDP_Transformed.json" - - -def transform_df(df): - """ - Transformation function - """ - # Million -> Billion - df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) - - # Sort by GDP - df = df.sort_values(by="GDP", ascending=False) - - return df +from modules.query_helper import ( + print_gdp_over_100_countries_df, + print_top5_avg_gdp_by_region_df, +) def main(): @@ -27,34 +14,18 @@ def main(): logger.print_separator() logger.info("Starting the ETL process") - # Extract - # parsing html and store to raw_data_file_path - wiki_importer = WikiWebImporter(raw_data_file_path=RAW_DATA_FILE_PATH) - df = wiki_importer.import_data() + importer = WikiWebImporter(raw_data_file_path=RAW_DATA_FILE_PATH) + df = importer.import_data() - # Transform - # transform GDP to billion and sort by GDP - logger.info("Transforming data...") - df = transform_df(df) + df = transform_gdp(df) - # Load - # export to output_file_path exporter = JsonFileExporter(OUTPUT_FILE_PATH) exporter.export_data(df) logger.info("ETL process completed successfully") - # Query - df_over_100 = df[df["GDP"] > 100] - print("Countries with GDP > 100B:") - for _, row in df_over_100.iterrows(): - print(f"{row['Country']:<20} {row['GDP']}") - - df_groupby_top5 = df.groupby("Region").head(5) - avg_gdp = df_groupby_top5.groupby("Region")["GDP"].mean() - print("Top 5 Average GDP by Region:") - for region, gdp in avg_gdp.items(): - print(f"{region:<15} {gdp:.2f}") + print_gdp_over_100_countries_df(df) + print_top5_avg_gdp_by_region_df(df) if __name__ == "__main__": diff --git a/missions/W1/M3/etl_project_gdp_from_csv.py b/missions/W1/M3/etl_project_gdp_from_csv.py index 9ae3090..ec8539d 100644 --- a/missions/W1/M3/etl_project_gdp_from_csv.py +++ b/missions/W1/M3/etl_project_gdp_from_csv.py @@ -1,62 +1,9 @@ -import sqlite3 -import time -import pandas as pd +from config import LOG_FILE_PATH, DB_PATH, TABLE_NAME, CSV_INPUT_FILE_PATH from modules.logger import logger, init_logger, LogExecutionTime from modules.importer import CsvFileImporter +from modules.transformer import transform_gdp, rename_columns from modules.exporter import SqliteExporter -from pathlib import Path - -HOME_DIR = Path(__file__).resolve().parent -LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" -DB_PATH = HOME_DIR / "data/World_Economies.db" -INPUT_FILE_PATH = HOME_DIR / "data/large_data.csv" -TABLE_NAME = "Countries_by_GDP" - -QUERY_1 = """ -SELECT Country, GDP_USD_billion -FROM Countries_by_GDP -WHERE GDP_USD_billion > 100 -ORDER BY GDP_USD_billion DESC -""" -QUERY_2 = """ -SELECT Region, AVG(GDP_USD_billion) FROM -( - SELECT - Country, - GDP_USD_billion, - Region, - ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num - FROM Countries_by_GDP -) -WHERE row_num <= 5 -GROUP BY Region -""" - - -def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: - """ - Transformation function - """ - # Million -> Billion - # df["GDP"] = df["GDP"].apply(lambda x: x.replace(",", "")) - # df["GDP"] = df["GDP"].apply(lambda x: round(float(x) / 1000, 2)) - - # df["GDP"] = df["GDP"].apply(lambda x: round(float(x.replace(",", "")) / 1000, 2)) - # df["GDP"] = (df["GDP"].replace(",", "", regex=True).astype(float) / 1000).round(2) - df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) - # df["GDP"] = ( - # pd.to_numeric(df["GDP"].str.replace(",", ""), errors="coerce") - # .div(1000) - # .round(2) - # ) - - # Sort by GDP - df = df.sort_values(by="GDP", ascending=False) - - # Rename GDP column to GDP_USD_billion - df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) - - return df +from modules.query_helper import print_top5_avg_gdp_by_region_sql def main(): @@ -64,36 +11,21 @@ def main(): logger.print_separator() logger.info("Starting the ETL process") - # Extract with LogExecutionTime("Extract"): - csv_importer = CsvFileImporter(INPUT_FILE_PATH) - df = csv_importer.import_data() + importer = CsvFileImporter(CSV_INPUT_FILE_PATH) + df = importer.import_data() - # Transform with LogExecutionTime("Transform"): - df = transfrom_df(df) + df = transform_gdp(df) + df = rename_columns(df, "GDP", "GDP_USD_billion") - # Load with LogExecutionTime("Load"): - sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) - sqlite_exporter.export_data(df) + exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) + exporter.export_data(df) logger.info("ETL process completed successfully") - # Query - print("Top 5 Average GDP by Region:") - - df_groupby_top5 = df.groupby("Region").head(5) - avg_gdp = df_groupby_top5.groupby("Region")["GDP_USD_billion"].mean() - for region, gdp in avg_gdp.items(): - print(f"{region:<15} {gdp:.2f}") - - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - cursor.execute(QUERY_2) - for row in cursor: - print(f"{row[0]:<15} {row[1]:.2f}") - conn.close() + print_top5_avg_gdp_by_region_sql(DB_PATH, TABLE_NAME) if __name__ == "__main__": diff --git a/missions/W1/M3/etl_project_gdp_parallel.py b/missions/W1/M3/etl_project_gdp_parallel.py index 4aafcac..b4bffb0 100644 --- a/missions/W1/M3/etl_project_gdp_parallel.py +++ b/missions/W1/M3/etl_project_gdp_parallel.py @@ -1,39 +1,24 @@ +from multiprocessing import Pool +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor import sqlite3 -import time import pandas as pd -from pathlib import Path from modules.logger import logger, init_logger, LogExecutionTime -from multiprocessing import Pool -from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor +from config import ( + LOG_FILE_PATH, + DB_NAME, + TABLE_NAME, + CSV_FILE_NAME, + CSV_INPUT_FILE_PATH, +) -HOME_DIR = Path(__file__).resolve().parent -LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" -DB_NAME = "World_Economies" -TABLE_NAME = "Countries_by_GDP" -INPUT_FILE_PATH = HOME_DIR / "data/large_data.csv" DATA_SIZE = 10_000_000 # 10M rows CHUNK_SIZE = 1_000_000 # 100K rows per chunk NUM_CHUNKS = DATA_SIZE // CHUNK_SIZE # 100 chunks - -QUERY_1 = """ -SELECT Country, GDP_USD_billion -FROM Countries_by_GDP -WHERE GDP_USD_billion > 100 -ORDER BY GDP_USD_billion DESC -""" -QUERY_2 = """ -SELECT Region, AVG(GDP_USD_billion) FROM -( - SELECT - Country, - GDP_USD_billion, - Region, - ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num - FROM Countries_by_GDP -) -WHERE row_num <= 5 -GROUP BY Region -""" +SCHEMA = { + "Country": str, + "GDP": str, + "Region": str, +} def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: @@ -43,37 +28,29 @@ def transfrom_df(df: pd.DataFrame) -> pd.DataFrame: df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) - # Rename GDP column to GDP_USD_billion df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) return df -schema = { - "Country": str, - "GDP": str, - "Region": str, -} - - -def process_chunk(index: int): +def read_and_save_chunk(index: int): df = pd.read_csv( - INPUT_FILE_PATH, - dtype=schema, + CSV_INPUT_FILE_PATH, + dtype=SCHEMA, header=None, - names=schema.keys(), + names=SCHEMA.keys(), skiprows=index * CHUNK_SIZE, nrows=CHUNK_SIZE, ) - df.to_csv(f"data/large_data_{index}.csv", index=False) + df.to_csv(f"data/{CSV_FILE_NAME}_{index}.csv", index=False) -def process_chunk2(index: int, chunk: pd.DataFrame): +def save_chunk(index: int, chunk: pd.DataFrame): print(f"Processing chunk {index}") - chunk.to_csv(f"data/large_data_{index}.csv", index=False) + chunk.to_csv(f"data/{CSV_FILE_NAME}_{index}.csv", index=False) -def extract_data_from_source(): +def sequential_split(): """ (Extract) Seperate one big file into multiple small files. @@ -82,15 +59,15 @@ def extract_data_from_source(): with Pool() as pool: with pd.read_csv( - INPUT_FILE_PATH, - dtype=schema, + CSV_INPUT_FILE_PATH, + dtype=SCHEMA, header=None, - names=schema.keys(), + names=SCHEMA.keys(), chunksize=CHUNK_SIZE, ) as reader: results = [] for i, chunk in enumerate(reader): - results.append(pool.apply_async(process_chunk2, args=(i, chunk))) + results.append(pool.apply_async(save_chunk, args=(i, chunk))) pool.close() pool.join() @@ -100,28 +77,28 @@ def transform_chunk(index: int): """ (Transform - Preprocess) Transform each small file """ - df = pd.read_csv(f"data/large_data_{index}.csv", dtype=schema) + df = pd.read_csv(f"data/{CSV_FILE_NAME}_{index}.csv", dtype=SCHEMA) df = transfrom_df(df) - df.to_csv(f"data/large_data_{index}_transformed.csv", index=False) + df.to_csv(f"data/{CSV_FILE_NAME}_{index}_transformed.csv", index=False) def map_by_region(index: int): """ (Transform - Map) Separate each small file by region """ - df = pd.read_csv(f"data/large_data_{index}_transformed.csv") + df = pd.read_csv(f"data/{CSV_FILE_NAME}_{index}_transformed.csv") regions = ["Asia", "Europe", "Africa", "North America", "South America", "Oceania"] for region in regions: region_df = df[df["Region"] == region] - region_df.to_csv(f"data/large_data_{index}_{region}.csv", index=False) + region_df.to_csv(f"data/{CSV_FILE_NAME}_{index}_{region}.csv", index=False) def reduce_by_region(region: str): """ (Transform - Reduce) Merge all files for each region """ - all_files = [f"data/large_data_{i}_{region}.csv" for i in range(NUM_CHUNKS)] + all_files = [f"data/{CSV_FILE_NAME}_{i}_{region}.csv" for i in range(NUM_CHUNKS)] dfs = [] for file in all_files: @@ -133,16 +110,16 @@ def reduce_by_region(region: str): if dfs: combined_df = pd.concat(dfs, ignore_index=True) - combined_df.to_csv(f"data/large_data_{region}.csv", index=False) + combined_df.to_csv(f"data/{CSV_FILE_NAME}_{region}.csv", index=False) def sort_by_gdp(region: str): """ (Transform - Sort) Sort each region file by GDP """ - df = pd.read_csv(f"data/large_data_{region}.csv") + df = pd.read_csv(f"data/{CSV_FILE_NAME}_{region}.csv") df = df.sort_values(by="GDP_USD_billion", ascending=False) - df.to_csv(f"data/large_data_{region}_sorted.csv", index=False) + df.to_csv(f"data/{CSV_FILE_NAME}_{region}_sorted.csv", index=False) def load_to_database(region: str): @@ -150,7 +127,7 @@ def load_to_database(region: str): (Load) Export each region file to sqlite """ conn = sqlite3.connect(f"data/{DB_NAME}_{region}.db") - df = pd.read_csv(f"data/large_data_{region}_sorted.csv") + df = pd.read_csv(f"data/{CSV_FILE_NAME}_{region}_sorted.csv") df.to_sql(TABLE_NAME, conn, if_exists="append", index=False) conn.close() @@ -170,27 +147,27 @@ def main(): logger.print_separator() logger.info("Starting the Parallel ETL process") - # 1. Extract - with LogExecutionTime("Extract data"): - # extract_data_from_source() + # 1. Split + with LogExecutionTime("Split data"): + # sequential_split() with Pool() as pool: - pool.map(process_chunk, range(NUM_CHUNKS)) + pool.map(read_and_save_chunk, range(NUM_CHUNKS)) # with ThreadPoolExecutor() as executor: - # executor.map(process_chunk, range(NUM_CHUNKS)) + # executor.map(read_and_save_chunk, range(NUM_CHUNKS)) # with ProcessPoolExecutor() as executor: - # executor.map(process_chunk, range(NUM_CHUNKS)) + # executor.map(read_and_save_chunk, range(NUM_CHUNKS)) - # 2. Transform - Preprocess + # 2. Preprocess with LogExecutionTime("Transform chunks"): with Pool() as pool: pool.map(transform_chunk, range(NUM_CHUNKS)) - # 3. Transform - Map + # 3. Map with LogExecutionTime("Map by region"): with Pool() as pool: pool.map(map_by_region, range(NUM_CHUNKS)) - # 4. Transform - Reduce + # 4. Reduce with LogExecutionTime("Reduce by region"): regions = [ "Asia", diff --git a/missions/W1/M3/etl_project_gdp_with_sql.py b/missions/W1/M3/etl_project_gdp_with_sql.py index 98f1642..30faeb2 100644 --- a/missions/W1/M3/etl_project_gdp_with_sql.py +++ b/missions/W1/M3/etl_project_gdp_with_sql.py @@ -1,49 +1,12 @@ -import sqlite3 -from pathlib import Path +from config import LOG_FILE_PATH, DB_PATH, TABLE_NAME from modules.logger import logger, init_logger from modules.importer import WikiWebImporter +from modules.transformer import transform_gdp, rename_columns from modules.exporter import SqliteExporter - -HOME_DIR = Path(__file__).resolve().parent -LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" -DB_PATH = HOME_DIR / "data/World_Economies.db" -TABLE_NAME = "Countries_by_GDP" - -QUERY_1 = """ -SELECT Country, GDP_USD_billion -FROM Countries_by_GDP -WHERE GDP_USD_billion > 100 -ORDER BY GDP_USD_billion DESC -""" -QUERY_2 = """ -SELECT Region, AVG(GDP_USD_billion) FROM -( - SELECT - Country, - GDP_USD_billion, - Region, - ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num - FROM Countries_by_GDP +from modules.query_helper import ( + print_gdp_over_100_countries_sql, + print_top5_avg_gdp_by_region_sql, ) -WHERE row_num <= 5 -GROUP BY Region -""" - - -def transfrom_df(df): - """ - Transformation function - """ - # Million -> Billion - df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) - - # Sort by GDP - df = df.sort_values(by="GDP", ascending=False) - - # Rename GDP column to GDP_USD_billion - df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) - - return df def main(): @@ -51,32 +14,19 @@ def main(): logger.print_separator() logger.info("Starting the ETL process") - # Extract - wiki_importer = WikiWebImporter() - df = wiki_importer.import_data() + importer = WikiWebImporter() + df = importer.import_data() - # Transform - logger.info("Transforming data...") - df = transfrom_df(df) + df = transform_gdp(df) + df = rename_columns(df, "GDP", "GDP_USD_billion") - # Load - sqlite_exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) - sqlite_exporter.export_data(df) + exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) + exporter.export_data(df) logger.info("ETL process completed successfully") - # Query - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - cursor.execute(QUERY_1) - print("Countries with GDP > 100B:") - for row in cursor: - print(f"{row[0]:<20} {row[1]}") - cursor.execute(QUERY_2) - print("Top 5 Average GDP by Region:") - for row in cursor: - print(f"{row[0]:<15} {row[1]:.2f}") - conn.close() + print_gdp_over_100_countries_sql(DB_PATH, TABLE_NAME) + print_top5_avg_gdp_by_region_sql(DB_PATH, TABLE_NAME) if __name__ == "__main__": diff --git a/missions/W1/M3/modules/importer.py b/missions/W1/M3/modules/importer.py index 4c81118..69fe21b 100644 --- a/missions/W1/M3/modules/importer.py +++ b/missions/W1/M3/modules/importer.py @@ -10,7 +10,9 @@ class ImporterInterface(ABC): """ General Data importer interface. - Importer Rule: import data from source and return dataframe. The dataframe should have the following columns: + Importer Rule: import data from source and parse it to dataframe. + + The dataframe should have the following columns: - Country - GDP - Region @@ -22,16 +24,15 @@ def __init__(self, source: str): @abstractmethod def import_data(self) -> pd.DataFrame: """ - Import raw data from the source + Import raw data from the source and parse it to dataframe. """ pass class WebImporterInterface(ImporterInterface): """ - Web Crawler interface. request -> parse -> return + Web Crawler interface. request(extract) -> parse(transform) -> return Subclass should implement _parse_html method. - Web importer는 HTML을 파싱하여 중간 데이터를 만들기 때문에(일종의 Tranform 작업) 중간 데이터를 저장할 수 있는 옵션을 둔다. """ def __init__(self, source: str, raw_data_file_path: str = None): @@ -144,7 +145,8 @@ def parse_json(file_path): class FileImporter(ImporterInterface): """ File data importer - File importer는 별도의 trasnform 작업이 없으니 중간 데이터가 없다. + + Read file and parse it to dataframe. """ def import_data(self) -> pd.DataFrame: diff --git a/missions/W1/M3/modules/query_helper.py b/missions/W1/M3/modules/query_helper.py new file mode 100644 index 0000000..b5f758e --- /dev/null +++ b/missions/W1/M3/modules/query_helper.py @@ -0,0 +1,72 @@ +import pandas as pd +import sqlite3 + + +def print_gdp_over_100_countries_df(df: pd.DataFrame) -> pd.DataFrame: + """ + (Pandas) Print countries with GDP > 100B. + """ + df_over_100 = df[df["GDP"] > 100] + print("Countries with GDP > 100B:") + for _, row in df_over_100.iterrows(): + print(f"{row['Country']:<20} {row['GDP']}") + + +def print_gdp_over_100_countries_sql(db_path: str, table_name: str): + """ + (SQLite) Print countries with GDP > 100B. + """ + + QUERY_1 = f""" +SELECT Country, GDP_USD_billion +FROM {table_name} +WHERE GDP_USD_billion > 100 +ORDER BY GDP_USD_billion DESC +""" + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute(QUERY_1) + print("Countries with GDP > 100B:") + for row in cursor: + print(f"{row[0]:<20} {row[1]}") + conn.close() + + +def print_top5_avg_gdp_by_region_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Print top 5 average GDP by region. + """ + df_groupby_top5 = df.groupby("Region").head(5) + avg_gdp = df_groupby_top5.groupby("Region")["GDP"].mean() + print("Top 5 Average GDP by Region:") + for region, gdp in avg_gdp.items(): + print(f"{region:<15} {gdp:.2f}") + + +def print_top5_avg_gdp_by_region_sql(db_path: str, table_name: str): + """ + (SQLite) Print top 5 average GDP by region. + """ + + QUERY_2 = f""" +SELECT Region, AVG(GDP_USD_billion) FROM +( + SELECT + Country, + GDP_USD_billion, + Region, + ROW_NUMBER() OVER (PARTITION BY Region ORDER BY GDP_USD_billion DESC) AS row_num + FROM {table_name} +) +WHERE row_num <= 5 +GROUP BY Region +""" + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + cursor.execute(QUERY_2) + print("Top 5 Average GDP by Region:") + for row in cursor: + print(f"{row[0]:<15} {row[1]:.2f}") + conn.close() diff --git a/missions/W1/M3/modules/transformer.py b/missions/W1/M3/modules/transformer.py new file mode 100644 index 0000000..9684661 --- /dev/null +++ b/missions/W1/M3/modules/transformer.py @@ -0,0 +1,20 @@ +import pandas as pd + + +def transform_gdp(df: pd.DataFrame) -> pd.DataFrame: + """ + Convert GDP to billion and sort by GDP. + """ + + df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) + df = df.sort_values(by="GDP", ascending=False) + + return df + + +def rename_columns(df: pd.DataFrame, from_column: str, to_column: str) -> pd.DataFrame: + """ + Rename columns. + """ + df.rename(columns={from_column: to_column}, inplace=True) + return df From 6f47f94183f7dc548a06ac32562ee6273726d52b Mon Sep 17 00:00:00 2001 From: openkmj Date: Fri, 17 Jan 2025 18:57:43 +0900 Subject: [PATCH 4/4] fix --- .gitignore | 9 +++++---- missions/W1/M3/config.py | 3 ++- missions/W1/M3/etl_project_gdp_from_csv.py | 3 +-- missions/W1/M3/etl_project_gdp_parallel.py | 1 + missions/W1/M3/etl_project_gdp_with_sql.py | 3 +-- missions/W1/M3/modules/importer.py | 23 ++++++++++------------ missions/W1/M3/modules/query_helper.py | 6 +++--- missions/W1/M3/modules/transformer.py | 9 +-------- 8 files changed, 24 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index c0b44ba..c594f41 100644 --- a/.gitignore +++ b/.gitignore @@ -159,13 +159,14 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -*.db -large_data*.csv .DS_Store .venv .ipynb_checkpoints -large_data*.csv __pycache__ *.db *.json -*log.txt \ No newline at end of file +*.txt +*.html +*.csv +*.json +*.sql \ No newline at end of file diff --git a/missions/W1/M3/config.py b/missions/W1/M3/config.py index c367da9..e46a048 100644 --- a/missions/W1/M3/config.py +++ b/missions/W1/M3/config.py @@ -2,7 +2,8 @@ HOME_DIR = Path(__file__).resolve().parent LOG_FILE_PATH = HOME_DIR / "log/etl_project_log.txt" -RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.json" +# RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.json" +RAW_DATA_FILE_PATH = HOME_DIR / "data/Countries_by_GDP.html" OUTPUT_FILE_PATH = HOME_DIR / "data/Countries_by_GDP_Transformed.json" DB_NAME = "World_Economies" TABLE_NAME = "Countries_by_GDP" diff --git a/missions/W1/M3/etl_project_gdp_from_csv.py b/missions/W1/M3/etl_project_gdp_from_csv.py index ec8539d..5d9a7d5 100644 --- a/missions/W1/M3/etl_project_gdp_from_csv.py +++ b/missions/W1/M3/etl_project_gdp_from_csv.py @@ -1,7 +1,7 @@ from config import LOG_FILE_PATH, DB_PATH, TABLE_NAME, CSV_INPUT_FILE_PATH from modules.logger import logger, init_logger, LogExecutionTime from modules.importer import CsvFileImporter -from modules.transformer import transform_gdp, rename_columns +from modules.transformer import transform_gdp from modules.exporter import SqliteExporter from modules.query_helper import print_top5_avg_gdp_by_region_sql @@ -17,7 +17,6 @@ def main(): with LogExecutionTime("Transform"): df = transform_gdp(df) - df = rename_columns(df, "GDP", "GDP_USD_billion") with LogExecutionTime("Load"): exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) diff --git a/missions/W1/M3/etl_project_gdp_parallel.py b/missions/W1/M3/etl_project_gdp_parallel.py index b4bffb0..46659eb 100644 --- a/missions/W1/M3/etl_project_gdp_parallel.py +++ b/missions/W1/M3/etl_project_gdp_parallel.py @@ -11,6 +11,7 @@ CSV_INPUT_FILE_PATH, ) +# TODO: 동적으로 가져와야한다. DATA_SIZE = 10_000_000 # 10M rows CHUNK_SIZE = 1_000_000 # 100K rows per chunk NUM_CHUNKS = DATA_SIZE // CHUNK_SIZE # 100 chunks diff --git a/missions/W1/M3/etl_project_gdp_with_sql.py b/missions/W1/M3/etl_project_gdp_with_sql.py index 30faeb2..1b67903 100644 --- a/missions/W1/M3/etl_project_gdp_with_sql.py +++ b/missions/W1/M3/etl_project_gdp_with_sql.py @@ -1,7 +1,7 @@ from config import LOG_FILE_PATH, DB_PATH, TABLE_NAME from modules.logger import logger, init_logger from modules.importer import WikiWebImporter -from modules.transformer import transform_gdp, rename_columns +from modules.transformer import transform_gdp from modules.exporter import SqliteExporter from modules.query_helper import ( print_gdp_over_100_countries_sql, @@ -18,7 +18,6 @@ def main(): df = importer.import_data() df = transform_gdp(df) - df = rename_columns(df, "GDP", "GDP_USD_billion") exporter = SqliteExporter(DB_PATH, table_name=TABLE_NAME) exporter.export_data(df) diff --git a/missions/W1/M3/modules/importer.py b/missions/W1/M3/modules/importer.py index 69fe21b..2b53684 100644 --- a/missions/W1/M3/modules/importer.py +++ b/missions/W1/M3/modules/importer.py @@ -42,9 +42,9 @@ def __init__(self, source: str, raw_data_file_path: str = None): def import_data(self) -> pd.DataFrame: logger.info(f"Importing data from {self.source}...") html = self._get_html() - df = self._parse_html(html) if self.raw_data_file_path: - self._store_raw_data(self.raw_data_file_path, df) + self._store_raw_data(self.raw_data_file_path, html) + df = self._parse_html(html) logger.info(f"Data imported successfully") return df @@ -68,11 +68,12 @@ def _parse_html(self, html: str) -> pd.DataFrame: """ pass - def _store_raw_data(self, path: str, df: pd.DataFrame): + def _store_raw_data(self, path: str, data: str): """ Store raw data to the given file """ - df.to_json(path, orient="records", indent=2) + with open(path, "w") as file: + file.write(data) class WikiWebImporter(WebImporterInterface): @@ -129,16 +130,12 @@ def _map_region(self, df: pd.DataFrame) -> pd.DataFrame: Map region to the given country """ - def parse_json(file_path): - """ - Read JSON file - """ - with open(file_path, "r") as file: - data = json.load(file) - return data + country_region_df = pd.read_json(self.COUNTRY_REGION_TABLE_PATH, orient="index") + country_region_df = country_region_df.reset_index() + country_region_df.columns = ["Country", "Region"] + + df = df.merge(country_region_df, on="Country", how="left") - country_region_table = parse_json(self.COUNTRY_REGION_TABLE_PATH) - df["Region"] = df["Country"].map(country_region_table) return df diff --git a/missions/W1/M3/modules/query_helper.py b/missions/W1/M3/modules/query_helper.py index b5f758e..2904e55 100644 --- a/missions/W1/M3/modules/query_helper.py +++ b/missions/W1/M3/modules/query_helper.py @@ -6,10 +6,10 @@ def print_gdp_over_100_countries_df(df: pd.DataFrame) -> pd.DataFrame: """ (Pandas) Print countries with GDP > 100B. """ - df_over_100 = df[df["GDP"] > 100] + df_over_100 = df[df["GDP_USD_billion"] > 100] print("Countries with GDP > 100B:") for _, row in df_over_100.iterrows(): - print(f"{row['Country']:<20} {row['GDP']}") + print(f"{row['Country']:<20} {row['GDP_USD_billion']}") def print_gdp_over_100_countries_sql(db_path: str, table_name: str): @@ -38,7 +38,7 @@ def print_top5_avg_gdp_by_region_df(df: pd.DataFrame) -> pd.DataFrame: Print top 5 average GDP by region. """ df_groupby_top5 = df.groupby("Region").head(5) - avg_gdp = df_groupby_top5.groupby("Region")["GDP"].mean() + avg_gdp = df_groupby_top5.groupby("Region")["GDP_USD_billion"].mean() print("Top 5 Average GDP by Region:") for region, gdp in avg_gdp.items(): print(f"{region:<15} {gdp:.2f}") diff --git a/missions/W1/M3/modules/transformer.py b/missions/W1/M3/modules/transformer.py index 9684661..94edcc4 100644 --- a/missions/W1/M3/modules/transformer.py +++ b/missions/W1/M3/modules/transformer.py @@ -8,13 +8,6 @@ def transform_gdp(df: pd.DataFrame) -> pd.DataFrame: df["GDP"] = (df["GDP"].str.replace(",", "").astype(float) / 1000).round(2) df = df.sort_values(by="GDP", ascending=False) + df.rename(columns={"GDP": "GDP_USD_billion"}, inplace=True) return df - - -def rename_columns(df: pd.DataFrame, from_column: str, to_column: str) -> pd.DataFrame: - """ - Rename columns. - """ - df.rename(columns={from_column: to_column}, inplace=True) - return df