From 3d4e69833efb8824af9ad61d0d27e5ef0bf85273 Mon Sep 17 00:00:00 2001 From: silil Date: Sun, 30 Jun 2019 23:18:24 +0100 Subject: [PATCH] cyclical features --- ...feature_engineering_cyclic_variables.ipynb | 440 ++++++++++++++++++ 1 file changed, 440 insertions(+) create mode 100644 sources/curriculum/3_modeling_and_machine_learning/machine-learning/feature_engineering_cyclic_variables.ipynb diff --git a/sources/curriculum/3_modeling_and_machine_learning/machine-learning/feature_engineering_cyclic_variables.ipynb b/sources/curriculum/3_modeling_and_machine_learning/machine-learning/feature_engineering_cyclic_variables.ipynb new file mode 100644 index 00000000..d37eab08 --- /dev/null +++ b/sources/curriculum/3_modeling_and_machine_learning/machine-learning/feature_engineering_cyclic_variables.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feature engineering \n", + "\n", + "#### Cyclical features\n", + "\n", + "Variables normally treated as ordinals but that represents a cycle:\n", + "\n", + "+ Time: minutes, hours, seconds\n", + "+ Day of the week\n", + "+ Month of the year\n", + "+ Week of the month\n", + "+ Week of the year\n", + "+ Season\n", + "\n", + "Most of the times when we want to use these kind of features in an algorithm we use their ordinal representation. By doing this, we lack to include their nature of being part of a cycle and so to configure that the first value of a cyclical feature is **near** the last value of the same variable. For example: \n", + "\n", + "- December is near January\n", + "- 23:00 is near 3:00 \n", + "- Sunday is near Monday\n", + "\n", + "The best way to deal with these kind of variables is to make a transformation that includes this characteristics of being cyclical using the Sine and Cosine wave functions. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T21:58:25.138148Z", + "start_time": "2019-06-30T21:58:25.121908Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T22:11:04.541748Z", + "start_time": "2019-06-30T22:11:04.009077Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,0,'observation number')" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# generate some random hours\n", + "hours = list(np.random.choice(a=24, size=15, replace=False))\n", + "df = pd.DataFrame({'hour': hours})\n", + "# to visualize it better\n", + "df = df.sort_values('hour').reset_index(drop=True)\n", + "\n", + "df.plot()\n", + "plt.ylabel('hour')\n", + "plt.xlabel('observation number')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lest get the sine and cosine for each hour." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T21:58:26.205447Z", + "start_time": "2019-06-30T21:58:26.149449Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hoursin_hrcos_hr
000.000000e+001.000000
125.000000e-010.866025
248.660254e-010.500000
359.659258e-010.258819
479.659258e-01-0.258819
588.660254e-01-0.500000
697.071068e-01-0.707107
7105.000000e-01-0.866025
8112.588190e-01-0.965926
9121.224647e-16-1.000000
1014-5.000000e-01-0.866025
1119-9.659258e-010.258819
1220-8.660254e-010.500000
1321-7.071068e-010.707107
1422-5.000000e-010.866025
\n", + "
" + ], + "text/plain": [ + " hour sin_hr cos_hr\n", + "0 0 0.000000e+00 1.000000\n", + "1 2 5.000000e-01 0.866025\n", + "2 4 8.660254e-01 0.500000\n", + "3 5 9.659258e-01 0.258819\n", + "4 7 9.659258e-01 -0.258819\n", + "5 8 8.660254e-01 -0.500000\n", + "6 9 7.071068e-01 -0.707107\n", + "7 10 5.000000e-01 -0.866025\n", + "8 11 2.588190e-01 -0.965926\n", + "9 12 1.224647e-16 -1.000000\n", + "10 14 -5.000000e-01 -0.866025\n", + "11 19 -9.659258e-01 0.258819\n", + "12 20 -8.660254e-01 0.500000\n", + "13 21 -7.071068e-01 0.707107\n", + "14 22 -5.000000e-01 0.866025" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HOURS = 24\n", + "\n", + "df['sin_hr'] = np.sin(2*np.pi*df.hour/HOURS)\n", + "df['cos_hr'] = np.cos(2*np.pi*df.hour/HOURS)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you only use the sine transformation you will end up having 2 different values for each point of your original cyclical feature. For example, the sine transformation for the 11:00 hour corresponds to 0.25, but have to different values on the sine wave (if you use the 24 different values for an hour your sine wave will look prettier)." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T21:59:10.538782Z", + "start_time": "2019-06-30T21:59:09.957189Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,1,'sine transformation')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.sin_hr.plot()\n", + "plt.hlines(0.25, 0, 14, colors='r')\n", + "plt.ylabel('sine')\n", + "plt.xlabel('hour of the day')\n", + "plt.title('sine transformation')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to have only 1 value per observation, that is why we also need the cosine transformation. " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T21:59:16.582546Z", + "start_time": "2019-06-30T21:59:15.974686Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,1,'cosine transformation')" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.cos_hr.plot()\n", + "plt.hlines(-0.96,0,14, color='r')\n", + "plt.ylabel('cosine')\n", + "plt.xlabel('hour of the day')\n", + "plt.title('cosine transformation')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that for the 11:00 hour we also have two different values on the cosine wave.\n", + "\n", + "But when we use both, sine and cosine as a coordinate, we end up having original values of hours near the ones that should be close, i.e. 11:00 to 12:00." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-30T22:09:55.562464Z", + "start_time": "2019-06-30T22:09:54.959341Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5,1,'sine-cosine transformation')" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR0AAAEXCAYAAACQ8ZAdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHR1JREFUeJzt3X2YXWV57/Hvb5JJwiFoJgm1gUAiYhUqEDAF2ijNQVT09CTYYI0ehXiglrbW9ngqAbnaUgqWoC1qq/UFEaktiHApsWoRCTmcowZJaggQVJKgTSIvMS9ATDJMMvf5Yz0bF8Psmb0ne6+1Zs/vc137yt7r9V4vc+dZa+393IoIzMyK0lV2AGY2tjjpmFmhnHTMrFBOOmZWKCcdMyuUk46ZFcpJpySSPijpurLjaISkT0n6i7LjaDVJb5G0WdJuSSeXHc9AHbvf/T0dK4ukJcCFEfGakta/EXh/RNxexvoHxLKEEvdFkdzSsUqTNK6Ni58FPDSSGdscV2eLCL/a+AKWAluBZ4AfAa9Lwy8HvpjezwYCOB/4T+DnwGW5ZXQBlwAbge3ALcDUIdY5Ffg88DNgJ/DV3LjfBzYAO4DlwBFpuIBrgSeBp4EHgFelcTcAV6b384EtwP9O0z4GvDu3/InAR9J2PAF8CjhkkBiPA/YBB4DdwK7cuv4J+AbwC+As4L8BP0hxbQYuzy1nuH13KrA6zfsE8Pcpxt1pvl8AG3MxrQR2kSWjBbnlDBbXDcAngW+m5X0H+FXgo2m//xA4ObeM2jF8BlgPvKWBfXHlcMcujQvgIuCRFP8nSFcyVXuVHkAnv4BXpD+S2h/2bOBl6f3lvDDpfBY4BDgJ6AWOS+P/FFgFzEx/MJ8GbhpivV8HvgT0AN3Ab6fhZ6Y/ylPScv4BuCeNeyOwBphCloCOA2akcc+d/GRJZz9wRVr2m4E9QE8af236g5gKHAZ8DfjbOnEuAf7fgGE3AE8B88iS7aS0zhPS5xPJksc5De677wHvSu8nA6fn1hXAsel9d/qD/iAwIe2rZ4BXDBHXDWl/vjp9XgE8CpwHjAOuBO7Ore+twBFp/reRJa8Zw+yLK4c7drlt+bd0/I4GtgFnl/03MOhxLzuATn4Bx5K1Bs4CugeMu5wXJp2ZufHfBxan9w+TWkjp8wygDxg/yDpnAP21JDBg3OeAa3KfJ6flzE4n9Y+B04GuAfPlT/75wN78utM2nk6WrH5BSqxp3G8Cj9bZP/X+0G4cZr9+FLi2wX13D/DXwPRBlpNPOq8FHs9vO3ATqVU1WFxp2Gdzn/8EeDj3+QRSq6XOdqwFFg6zL2r7ve6xy23La3LjbwEuKftvYLCX7+m0UURsAP6MLME8KelmSUcMMcvjufd7yE4syO49fEXSLkm7yJLQAeAl6QnH7vT6IHAUsCMidg6y/COAn+bi2012uXZkRKwA/pGsWf6kpM9IelGdOLdHxP5BYj0c+C/Amlys/56GN2Nz/oOk0yTdLWmbpKfILiOmD5in3r67APg14IeS7pP0O3XWeQSwOSL6c8N+ChxZL67kidz7vYN8rsWBpPMkrc3tm1cNsh311D12uWnq7YNKcdJps4j418ieSMwi+99o2QgWsxl4U0RMyb0mRcTWiLgoIian14fStFMlTRlkOT9LcQAg6VBgGtk9JyLi4xHxauB4sj/UDzQZ58/J/tB+PRfniyOi3slf79HpwOH/SnbJdlREvJjsPpEaCSgiHomItwO/Qrbvb03bPdDPgKMk5f8mjibtm2HiHZakWWSXgO8FpkXEFOBBfrkdwy17yGM3mjjptJGkV0g6U9JEshuFe8kufZr1KeCqdOIi6XBJCwebMCIeI7ux+UlJPZK6JZ2RRt8EvFvSnBTTh4B7I+Inkn4jtSi6yS6R9jUba2olfBa4VtKvpFiPlPTGOrM8AcyUNGGYRR9G1nrbJ+lU4B2NxiTpnZIOT7HtSoMH2657yVoHF6d9Nh/478DNja5rGIeSJZZtKa53k7V0aobbF3WPXYviK4yTTntNBK4mawE8Tva/7aUjWM7HyP6n/5akZ8huKp82xPTvIrve/yHZ/ZY/A4iIbwN/AdxG9tTpZcDiNM+LyBLGTrJm/HbgwyOIdSnZDdlVkp4Gvk12Q30wK8ieEj0u6edDLPOPgCvStv8l2f2KRp0NPCRpN9l+XBwRewdOFBHPkiWZN5Edr08C50XED5tYV10RsR74O7Ib20+Q3e/5Tm6SIffFMMduVPGXA82sUG7pmFmhnHTMrFBOOmZWKCcdMyvU+LIDKNr06dNj9uzZZYdh1nHWrFnz84gY9ougYy7pzJ49m9WrV5cdhlnHkfTT4afy5ZWZFcxJx8wK5aRjZoVy0jGzQjnpmFmhnHTMrFClJx1J10t6UtKDdcZL0sclbZC0TtIpuXHnS3okvc4vLmprt+27e7l/8y627+4tOxRrsSp8T+cGsh7rbqwz/k3Ay9PrNLLOsU+TNBX4K2AuWT8layQtr9Njno0it6/dytLb1tHd1UVffz/XLDqRBXOOHH5GGxVKb+lExD1kvdvXs5Csb9qIiFXAFEkzyDoSvzMial1z3knWd4qNYtt397L0tnXs6+vnmd797Ovr5+Lb1rnF00FKTzoNOJLn9027JQ2rN/wFJL1H0mpJq7dt29a2QO3gbdm5l+6uLh6fcAmPT7gEgO6uLrbsfEG/Wwdv/vzsZYUaDUnnoEXEZyJibkTMPfzwZvsItyLN7DmEvv7n9yba19/PzJ5DSorIWm00JJ2tZBUOamamYfWG2yg2bfJErll0Il1dYlyXmNTdxTWLTmTa5Illh2YtMhqSznLgvPQU63TgqdT5+B3AG1Ln4z3AG9IwG+UWzDmSk4+awnEzXsR3lp7pm8gdpvSnV5JuIivgNl3SFrInUt0AEfEpsjKubybr7HsP8O40boekvwHuS4u6IiKGuiFto0j3uC66x3W5hdOBSk86qSbRUOMD+OM6464Hrm9HXGbWHqPh8srMOoiTjpkVyknHzArlpGNmhXLS6UD+sWQ1+DgMrvSnV9Za/rFkNfg41OeWTgfxjyWrwcdhaE46HaT2Y8m8tv1Y0urycRiak04H8Y8lq8HHYWhOOh2k9mPJSd1dHDZxvH8sWRIfh6H5RnKHWTDnSOYdO50tO/cys+cQn+gl8XGoz0mnA02bPNEneQX4OAzOl1dmVignHTMrlJOOmRXKScfMClV60pF0tqQfpWJ6lwwy/lpJa9Prx5J25cYdyI1bXmzkZjYSpT69kjQO+ATwerISMvelgnnra9NExP/KTf8nwMm5ReyNiDlFxWtmB6/sls6pwIaI2BQRzwI3kxXXq+ftwE2FRGZmbVF20mmmYN4s4KXAitzgSamI3ipJ59RbiYvtmVVH2UmnGYuBWyPiQG7YrIiYC7wD+Kiklw02Y9WL7bnfFRtLyv5GcjMF8xYzoCpERGxN/26StJLsfs/G1ofZPu53xcaasls69wEvl/RSSRPIEssLnkJJeiXQA3wvN6xH0sT0fjowD1g/cN4qc78rNhaVmnQiYj/wXrLKnA8Dt0TEQ5KukLQgN+li4OZUA6vmOGC1pPuBu4Gr80+9RgP3u2JjUdmXV0TEN8iqeOaH/eWAz5cPMt93gRPaGlybud8VG4vKvrwa09zvio1Fpbd0xjr3u2JjjZNOBbjfFRtLfHllZoVy0jGzQjnpmFmhnHTMrFBOOmZWKCcdMyuUk46ZFcpJx8wK5aRjZoVy0jGzQjnpmFmhnHTMrFClJ50G6l4tkbQtV9/qwty48yU9kl7nFxv587mfY6u6qpyjla97lXwpIt47YN6pwF8Bc4EA1qR5dxYQ+vO4n2Oruiqdo2W3dJqte5X3RuDOiNiREs2dwNltirMu93NsVVe1c7TspNNo3atFktZJulVSrXpEMzWz2lb3yv0cW9VV7RwtO+k04mvA7Ig4kaw184VmF9DOulfu59iqrmrnaNlJZ9i6VxGxPSJq7cDrgFc3Om8R3M+xVV3VztGyuyt9ru4VWcJYTFat8zmSZkTEY+njArJSNZCVrfmQpJ70+Q3Ape0P+YXcz7FVXZXO0VKTTkTsl1SrezUOuL5W9wpYHRHLgfelGlj7gR3AkjTvDkl/Q5a4AK6IiB2Fb0Tifo6t6qpyjpbd0hm27lVEXEqdFkxEXA9c39YAzaylyr6nY2ZjjJOOmRXKScfMCuWkY2aFctIxs0I56ZhZoZx0zKxQTjpmVignHTMrlJOOmRXKScfMCuWkY2aFctIxs0I56ZhZoZx0zKxQpSedBupevV/S+tQx+12SZuXGHcjVw1rejviqUivIrCjtPudHQ92rHwBzI2KPpD8ErgHelsbtjYg57YqvSrWCzIpQxDlfdktn2LpXEXF3ROxJH1eRdcDedlWrFWTWbkWd82UnnYZrVyUXAN/MfZ6U6lmtknROKwOrWq0gs3Yr6pwvvY/kRkl6J1kJ4d/ODZ4VEVslHQOskPRARGwcZN73AO8BOProoxtaX9VqBZm1W1HnfNktnYZqV0k6C7gMWJCrgUVEbE3/bgJWAicPtpKRFNurWq0gs3Yr6pwvu6XTSN2rk4FPA2dHxJO54T3AnojolTQdmEd2k7llqlQryKwIRZzzo6Hu1YeBycCXJQH8Z0QsAI4DPi2pn6zFdvWAp14tUZVaQWZFafc5X3ZLp5G6V2fVme+7wAntjc7MWq3sezpmNsY46ZhZoZx0zKxQTjpmVignHTMrVENJR1KXpN9qdzBm1vkaSjoR0U/2a3Azs4PSzOXVXZIWKX1Dz8xsJJpJOn8AfBnolfS0pGckPd2muMysQzX8jeSIOKydgZjZ2NDUzyAkHQnMys8XEfe0Oigz61wNJx1Jy8i6CV0PHEiDA3DSMbOGNdPSOQd4Rb4/GzOzZjVzI3kT0N2uQMxsbBi2pSPpH8guo/YAayXdBeR773tf+8Izs07TyOXV6vTvGqAttaXMbOwYNulExBcaWZCk2yJiUbMBSDob+BhZz4HXRcTVA8ZPBG4EXg1sB94WET9J4y4lqxBxAHhfRNzR7PrNrFit/MHnMc3OkCu29ybgeODtko4fMNkFwM6IOBa4FliW5j2erE/lXwfOBj6ZlmdmFdbKpBMjmGfYYnvpc621dSvwuvRTjIXAzRHRGxGPAhvS8syswsru2qKRYnvPTRMR+4GngGkNzgtkda9SUb7V27Zta1HoZjYSrUw6lf0h6EjqXplZe4wo6UjqkXTigMFLR7CoRortPTeNpPHAi8luKDdUqM/MqqXhpCNppaQXSZoK/AfwWUl/XxsfEd8awfqfK7YnaQLZjeGBj+WXA+en9+cCKyIi0vDFkiamYn0vB74/ghjMrEDN/AzixRHxtKQLgRsj4q8krTuYlTdYbO9zwD9L2gDsIEtMpOluIfst2H7gjyPiwKArMrPKaCbpjJc0A/g9srriLdFAsb19wFvrzHsVcFWrYjGz9mvmns4VZC2SjRFxn6RjgEfaE5aZdapmOvH6MlnPgbXPm4Cmv4FsZmNbMzeSZ0r6iqQn0+s2STPbGZyZdZ5mLq8+T/bE6Ij0+loaZmbWsGaSzuER8fmI2J9eNwD+pp2ZNaWZpLNd0jsljUuvd5J9Sc/MrGHNJJ3/Sfa4/HHgMbIv6i1pQ0xm1sGa+Z7OFcD5EbETIH0z+SNkycjMrCHNtHROrCUcgIjYAZzc+pDMrJM1k3S6JPXUPqSWTlN1s8zMmkkafwd8T1LtC4JvxT9BMLMmNfON5BslrQbOTIN+NyLWtycsM+tUTV0epSTjRGNmI1Z2d6VmNsY46ZhZoUpLOpKmSrpT0iPp355Bppkj6XuSHpK0TtLbcuNukPSopLXpNafYLTCzkSizpXMJcFdEvBy4K30eaA9wXkTUalt9VNKU3PgPRMSc9Frb/pDNOt/23b3cv3kX23f3Dj/xCJT5PZuFwPz0/gvASgZ07h4RP869/5mkJ8l+ZLqrmBDNxpbb125l6W3r6O7qoq+/n2sWnciCOYNWdhqxMls6L4mIx9L7x4GXDDWxpFOBCcDG3OCr0mXXtan8sJmN0PbdvSy9bR37+vp5pnc/+/r6ufi2dS1v8bQ16Uj6tqQHB3k9r4pnqu5Qt0Jo6pv5n4F3R0R/Gnwp8ErgN4CpDFECx8X2zIa3ZedeuruenxK6u7rYsnNvS9fT1suriDir3jhJT0iaERGPpaTyZJ3pXgR8HbgsIlblll1rJfVK+jzw50PE8RngMwBz584dSfljs443s+cQ+vr7nzesr7+fmT2HtHQ9ZV5e5etZnQ/cPnCCVAvrK2Qlb24dMG5G+lfAOcCDbY3WrMNNmzyRaxadyKTuLg6bOJ5J3V1cs+hEpk1u7Z2LMm8kXw3cIukC4KdkffUgaS5wUURcmIadAUyTtCTNtyQ9qfoXSYeTlTNeC1xUcPxmHWfBnCOZd+x0tuzcy8yeQ1qecKDEpBMR24HXDTJ8NXBhev9F4It15j9zsOFmdnCmTZ7YlmRT428km1mhnHTMrFBOOmZWKCcdMyuUk46ZFcpJx8wK5aRjZoVy0jGzQjnpmFmhnHTMrFBOOmZWKCcdMyuUk46ZFcpJx8wK5aRjZoVy0jGzQlW62F6a7kCuoN7y3PCXSrpX0gZJX0pdm5pZHe2uZ9WoqhfbA9ibK6i3IDd8GXBtRBwL7AQuaG+4ZqPX7Wu3Mm/ZCt553b3MW7aC5Wu3lhZLmUlnIVmRPdK/5zQ6Y+qM/Uyg1ll7U/ObjSVF1bNq1Ggotjcp1axaJamWWKYBuyJif/q8BahbhtB1r2wsK6qeVaPa2jG7pG8DvzrIqMvyHyIiJNWrRzUrIrZKOgZYIekB4Klm4nDdKxvLiqpn1ajKF9uLiK3p302SVgInA7cBUySNT62dmUB5F6lmFVarZ3XxgBrl7az4MJQy617Viu1dTf1iez3AnojolTQdmAdck1pGdwPnAjfXm9/MMkXUs2pUmfd0rgZeL+kR4Kz0GUlzJV2XpjkOWC3pfuBu4OqIWJ/GLQXeL2kD2T2ezxUavdkoM23yRE46akqpCQeqX2zvu8AJdebfBJzazhjNrPX8jWQzK5STjpkVyknHzArlpGNmhXLSMbNCOemYWaGcdMysUE46ZlYoJx0zK5STjpkVyknHzArlpGM2ilSln+ODUWbXFmbWhNvXbmXpgD5xFsyp22FmZbmlYzYKVK2f44PhpGM2ClStn+ODUem6V5L+a67m1VpJ+2qds0u6QdKjuXFzit8Ks2JUrZ/jg1HpulcRcXet5hVZyZk9wLdyk3wgVxNrbSFRm5Wg1s/xpO4uDps4nkndXaX2c3wwyryRvBCYn95/AVhJ1gVpPecC34yIPe0Ny6yaqtTP8cEYDXWvahYDNw0YdpWkdZKulTQ6j4BZE6rSz/HBGA11r0glak4A7sgNvpQsWU0gq2m1FLiizvzvAd4DcPTRRzexBWbWapWve5X8HvCViOjLLbvWSuqV9Hngz4eIw8X2zCqizMurWt0rGL5u1dsZcGmVElWtrvk5wINtiNHMWqzqda+QNBs4Cvg/A+b/l1Ri+AFgOnBlATGb2UGqdN2r9PknwAu+6x0RZ7YzPjNrD38j2cwK5aRjZoVy0jGzQjnpdKBO6HOlE/g4DM796XSYTulzZbTzcajPLZ0O0kl9roxmPg5Dc9LpIJ3U58po5uMwNCedDtJJfa6MZj4OQ3PS6SCd1OfKaObjMDTfSO4wndLnymjn41Cfk04HmjZ5ok/yCvBxGJwvr8ysUE46ZlYoJx0b0/oO9PtbwwXzPZ165s8vO4KxbU4q7tHG49C35j/Y29fPs689g40BcfihTPc9mBdaubKli3NLx8akvgP97O3rB4ID/UF/BJu2/YK+A/3DzmsHp7SWjqS3ApcDxwGnps67BpvubOBjwDjguoio9TD4UuBmYBqwBnhXRDzbsgBbnN2tOX2fO4Pe/f1sv/KOtjwBWr95F8++9gwO9AeL33E1AIdNHM8XLzyNk46a0vL12S+V2dJ5EPhd4J56E0gaB3wCeBNwPPB2Scen0cuAayPiWGAncEF7w7Wi3L52Kz/YvIuHH3uaectWsHzt1pavY2bPIcSALvr9reFilNld6cMAWb/qdZ0KbIiITWnam4GFkh4mq/j5jjTdF8haTf/Uqvjm3zC/VYuyJvQd6OcHm3exj40A/EQXs/ir4uQ1U+ge19r/I/sW/IR9ff1sm3QpAbxs+qEsunVZS9fRCVYuWdnS5VX9RvKRwObc5y3AaWSXVLsiYn9ueN1+A1z3avTo3d+PgAn9xzw3TGl4q5NO96tPgQP9HLe/n4nju1q+fBtcacX2ImKokjMtNZK6V63O7taY7bt7mbdsBfv6fnlDd1J3F3f8jzP97d4OUVqxvQZtJSs/UzMzDdsOTJE0PrV2asNtlKv9WPLiAR1gOeF0jqpfXt0HvDw9qdpKVs/8HakM8d3AuWRPsIYr1mejiH8s2dlKu4iV9BZJW4DfBL4u6Y40/AhJ3wBIrZj3ktUwfxi4JSIeSotYCrxf0gayezyfK3obrH2mTZ7ISUdNccLpQIqBzw073Ny5c2P16kG/EmRmB0HSmoiYO9x0vl1vZoVy0jGzQjnpmFmhnHTMrFBOOmZWqDH39ErSNuCnDUw6Hfh5m8Npp9EeP3gbqqLRbZgVEYcPN9GYSzqNkrS6kcd/VTXa4wdvQ1W0eht8eWVmhXLSMbNCOenU95myAzhIoz1+8DZURUu3wfd0zKxQbumYWaGcdMysUE46ZJUpJD0kqV9S3UeDks6W9CNJGyRdUmSMw5E0VdKdkh5J//bUme6ApLXptbzoOAcz3H6VNFHSl9L4eyXNLj7KoTWwDUskbcvt+wvLiLMeSddLelLSg3XGS9LH0/atk3TKiFcWEWP+RVYG5xXASmBunWnGARuBY4AJwP3A8WXHnovvGuCS9P4SYFmd6XaXHWuz+xX4I+BT6f1i4Etlxz2CbVgC/GPZsQ6xDWcApwAP1hn/ZuCbZF1Wnw7cO9J1uaVDVpkiIn40zGTPVaaIrL7WzcDC9kfXsIVkVTFI/55TYizNaGS/5rftVuB1GqaMSMGqfm4MKyLuAXYMMclC4MbIrCLrLnjGSNblpNO4wSpT1K1AUYKXRMRj6f3jwEvqTDdJ0mpJqyRVITE1sl+fmyay3iSfIustsioaPTcWpUuTWyUdNcj4KmvZ+V/1PpJbpiqVKQ7GUNuQ/xARIanedyFmRcRWSccAKyQ9EBEbWx2rvcDXgJsiolfSH5C13M4sOaZSjJmkE+2rTFGYobZB0hOSZkTEY6nZ+2SdZWxN/26StBI4GSgz6TSyX2vTbJE0HngxWUWQqhh2GyIiH+91ZPfgRpOWnf++vGrcc5UpJE0gu6FZiac/yXKyqhhQpzqGpB5JE9P76cA8YH1hEQ6ukf2a37ZzgRWR7m5WxLDbMOD+xwKyQgOjyXLgvPQU63TgqdzlfHPKvmtehRfwFrJr1F7gCeCONPwI4BsD7uD/mKxlcFnZcQ/YhmnAXcAjwLeBqWn4XOC69P63gAfInq48AFxQdtz19itwBbAgvZ8EfBnYAHwfOKbsmEewDX8LPJT2/d3AK8uOeUD8NwGPAX3pb+EC4CLgojRewCfS9j1Anae8jbz8MwgzK5Qvr8ysUE46ZlYoJx0zK5STjpkVyknHzArlpGNmhXLSsVJIuk7S8SOY7wZJ57YjJivGmPkZhFVLRLStPxlJ4yP7YahVkFs61naSDpX0dUn3S3pQ0tskrax1mCZpt6Sr0vhVkur9Qr7mDEnflbSp1uqRNF/S/00dk5X90w4bgpOOFeFs4GcRcVJEvAr49wHjDwVWRcRJwD3A7w+zvBnAa4DfAa7ODT8F+NOI+LXWhG3t4KRjRXgAeL2kZZJeGxFPDRj/LPBv6f0aYPYwy/tqRPRHxHqe32/Q9yPi0ZZEbG3jezrWdhHx49Sn7puBKyXdNWCSvvjljwAPMPx52Zt7n+9B8BcHF6kVwUnH2k7SEcCOiPiipF1ApTolt2I56VgRTgA+LKmfrOuEPwQ+Um5IVhZ3bWFmhfKNZDMrlC+vrJIkXQa8dcDgL0fEVWXEY63jyyszK5Qvr8ysUE46ZlYoJx0zK5STjpkV6v8DJ+JnpPBM6FYAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.plot.scatter('sin_hr', 'cos_hr').set_aspect('equal')\n", + "plt.hlines(-0.96, -1, 1, color='r') # hour 11\n", + "plt.vlines(0.25, -1, 1, color='r') # hour 11 \n", + "plt.hlines(-1, -1, 1, color='g') # hour 12\n", + "plt.vlines(0, -1, 1, color='g') # hour 12\n", + "plt.title(\"sine-cosine transformation\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The same transformation can be used for all the cyclical features mentioned before. In this way you will preserve important information about these features in your algorithms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}