diff --git a/README.md b/README.md index ff01b4c..fc9d95b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,9 @@ A flask skeleton with the app factory pattern and route blueprints for clean int ## [Flask Kafka Streaming](https://github.com/marksibrahim/recipes/tree/master/flask-kafka-streaming) (via Sockets) A flask app for streaming from Kafka via web sockets using socketio. +## [Modern Pandas](https://github.com/marksibrahim/recipes/tree/master/modern-pandas) +Pandas recipes and best practices + ## [Open a New Chrome Window on Mac](https://github.com/marksibrahim/recipes/tree/master/mac-new-chrome-window) An AppleScript to open a new chrome window in the current desktop. diff --git a/modern-pandas/data/gdp.csv b/modern-pandas/data/gdp.csv new file mode 100644 index 0000000..f347bb9 --- /dev/null +++ b/modern-pandas/data/gdp.csv @@ -0,0 +1,270 @@ +DATE,GDP +1947-01-01,243.1 +1947-04-01,246.3 +1947-07-01,250.1 +1947-10-01,260.3 +1948-01-01,266.2 +1948-04-01,272.9 +1948-07-01,279.5 +1948-10-01,280.7 +1949-01-01,275.4 +1949-04-01,271.7 +1949-07-01,273.3 +1949-10-01,271.0 +1950-01-01,281.2 +1950-04-01,290.7 +1950-07-01,308.5 +1950-10-01,320.3 +1951-01-01,336.4 +1951-04-01,344.5 +1951-07-01,351.8 +1951-10-01,356.6 +1952-01-01,360.2 +1952-04-01,361.4 +1952-07-01,368.1 +1952-10-01,381.2 +1953-01-01,388.5 +1953-04-01,392.3 +1953-07-01,391.7 +1953-10-01,386.5 +1954-01-01,385.9 +1954-04-01,386.7 +1954-07-01,391.6 +1954-10-01,400.3 +1955-01-01,413.8 +1955-04-01,422.2 +1955-07-01,430.9 +1955-10-01,437.8 +1956-01-01,440.5 +1956-04-01,446.8 +1956-07-01,452.0 +1956-10-01,461.3 +1957-01-01,470.6 +1957-04-01,472.8 +1957-07-01,480.3 +1957-10-01,475.7 +1958-01-01,468.4 +1958-04-01,472.8 +1958-07-01,486.7 +1958-10-01,500.4 +1959-01-01,511.1 +1959-04-01,524.2 +1959-07-01,525.2 +1959-10-01,529.3 +1960-01-01,543.3 +1960-04-01,542.7 +1960-07-01,546.0 +1960-10-01,541.1 +1961-01-01,545.9 +1961-04-01,557.4 +1961-07-01,568.2 +1961-10-01,581.6 +1962-01-01,595.2 +1962-04-01,602.6 +1962-07-01,609.6 +1962-10-01,613.1 +1963-01-01,622.7 +1963-04-01,631.8 +1963-07-01,645.0 +1963-10-01,654.8 +1964-01-01,671.1 +1964-04-01,680.8 +1964-07-01,692.8 +1964-10-01,698.4 +1965-01-01,719.2 +1965-04-01,732.4 +1965-07-01,750.2 +1965-10-01,773.1 +1966-01-01,797.3 +1966-04-01,807.2 +1966-07-01,820.8 +1966-10-01,834.9 +1967-01-01,846.0 +1967-04-01,851.1 +1967-07-01,866.6 +1967-10-01,883.2 +1968-01-01,911.1 +1968-04-01,936.3 +1968-07-01,952.3 +1968-10-01,970.1 +1969-01-01,995.4 +1969-04-01,1011.4 +1969-07-01,1032.0 +1969-10-01,1040.7 +1970-01-01,1053.5 +1970-04-01,1070.1 +1970-07-01,1088.5 +1970-10-01,1091.5 +1971-01-01,1137.8 +1971-04-01,1159.4 +1971-07-01,1180.3 +1971-10-01,1193.6 +1972-01-01,1233.8 +1972-04-01,1270.1 +1972-07-01,1293.8 +1972-10-01,1332.0 +1973-01-01,1380.7 +1973-04-01,1417.6 +1973-07-01,1436.8 +1973-10-01,1479.1 +1974-01-01,1494.7 +1974-04-01,1534.2 +1974-07-01,1563.4 +1974-10-01,1603.0 +1975-01-01,1619.6 +1975-04-01,1656.4 +1975-07-01,1713.8 +1975-10-01,1765.9 +1976-01-01,1824.5 +1976-04-01,1856.9 +1976-07-01,1890.5 +1976-10-01,1938.4 +1977-01-01,1992.5 +1977-04-01,2060.2 +1977-07-01,2122.4 +1977-10-01,2168.7 +1978-01-01,2208.7 +1978-04-01,2336.6 +1978-07-01,2398.9 +1978-10-01,2482.2 +1979-01-01,2531.6 +1979-04-01,2595.9 +1979-07-01,2670.4 +1979-10-01,2730.7 +1980-01-01,2796.5 +1980-04-01,2799.9 +1980-07-01,2860.0 +1980-10-01,2993.5 +1981-01-01,3131.8 +1981-04-01,3167.3 +1981-07-01,3261.2 +1981-10-01,3283.5 +1982-01-01,3273.8 +1982-04-01,3331.3 +1982-07-01,3367.1 +1982-10-01,3407.8 +1983-01-01,3480.3 +1983-04-01,3583.8 +1983-07-01,3692.3 +1983-10-01,3796.1 +1984-01-01,3912.8 +1984-04-01,4015.0 +1984-07-01,4087.4 +1984-10-01,4147.6 +1985-01-01,4237.0 +1985-04-01,4302.3 +1985-07-01,4394.6 +1985-10-01,4453.1 +1986-01-01,4516.3 +1986-04-01,4555.2 +1986-07-01,4619.6 +1986-10-01,4669.4 +1987-01-01,4736.2 +1987-04-01,4821.5 +1987-07-01,4900.5 +1987-10-01,5022.7 +1988-01-01,5090.6 +1988-04-01,5207.7 +1988-07-01,5299.5 +1988-10-01,5412.7 +1989-01-01,5527.4 +1989-04-01,5628.4 +1989-07-01,5711.6 +1989-10-01,5763.4 +1990-01-01,5890.8 +1990-04-01,5974.7 +1990-07-01,6029.5 +1990-10-01,6023.3 +1991-01-01,6054.9 +1991-04-01,6143.6 +1991-07-01,6218.4 +1991-10-01,6279.3 +1992-01-01,6380.8 +1992-04-01,6492.3 +1992-07-01,6586.5 +1992-10-01,6697.6 +1993-01-01,6748.2 +1993-04-01,6829.6 +1993-07-01,6904.2 +1993-10-01,7032.8 +1994-01-01,7136.3 +1994-04-01,7269.8 +1994-07-01,7352.3 +1994-10-01,7476.7 +1995-01-01,7545.3 +1995-04-01,7604.9 +1995-07-01,7706.5 +1995-10-01,7799.5 +1996-01-01,7893.1 +1996-04-01,8061.5 +1996-07-01,8159.0 +1996-10-01,8287.1 +1997-01-01,8402.1 +1997-04-01,8551.9 +1997-07-01,8691.8 +1997-10-01,8788.3 +1998-01-01,8889.7 +1998-04-01,8994.7 +1998-07-01,9146.5 +1998-10-01,9325.7 +1999-01-01,9447.1 +1999-04-01,9557.0 +1999-07-01,9712.3 +1999-10-01,9926.1 +2000-01-01,10031.0 +2000-04-01,10278.3 +2000-07-01,10357.4 +2000-10-01,10472.3 +2001-01-01,10508.1 +2001-04-01,10638.4 +2001-07-01,10639.5 +2001-10-01,10701.3 +2002-01-01,10834.4 +2002-04-01,10934.8 +2002-07-01,11037.1 +2002-10-01,11103.8 +2003-01-01,11230.1 +2003-04-01,11370.7 +2003-07-01,11625.1 +2003-10-01,11816.8 +2004-01-01,11988.4 +2004-04-01,12181.4 +2004-07-01,12367.7 +2004-10-01,12562.2 +2005-01-01,12813.7 +2005-04-01,12974.1 +2005-07-01,13205.4 +2005-10-01,13381.6 +2006-01-01,13648.9 +2006-04-01,13799.8 +2006-07-01,13908.5 +2006-10-01,14066.4 +2007-01-01,14233.2 +2007-04-01,14422.3 +2007-07-01,14569.7 +2007-10-01,14685.3 +2008-01-01,14668.4 +2008-04-01,14813.0 +2008-07-01,14843.0 +2008-10-01,14549.9 +2009-01-01,14383.9 +2009-04-01,14340.4 +2009-07-01,14384.1 +2009-10-01,14566.5 +2010-01-01,14681.1 +2010-04-01,14888.6 +2010-07-01,15057.7 +2010-10-01,15230.2 +2011-01-01,15238.4 +2011-04-01,15460.9 +2011-07-01,15587.1 +2011-10-01,15785.3 +2012-01-01,15973.9 +2012-04-01,16121.9 +2012-07-01,16227.9 +2012-10-01,16297.3 +2013-01-01,16475.4 +2013-04-01,16541.4 +2013-07-01,16749.3 +2013-10-01,16999.9 +2014-01-01,17025.2 diff --git a/modern-pandas/modern-pandas.ipynb b/modern-pandas/modern-pandas.ipynb index 10cbdd5..ad1b9df 100644 --- a/modern-pandas/modern-pandas.ipynb +++ b/modern-pandas/modern-pandas.ipynb @@ -35,7 +35,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Reading Data" + "Resources\n", + "* [Cookbook](https://github.com/jvns/pandas-cookbook) by Jake Vanderplas\n", + "* [Cheat Sheet](https://github.com/brandon-rhodes/pycon-pandas-tutorial/blob/master/cheat-sheet.txt) by Brandon Rhodes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1 - Reading Data" ] }, { @@ -90,15 +106,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## df.loc and Indicies" + "## Indicies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "`df.loc`: allows you to use the column, row indecies\n", - " `df.loc[row_labels, column_labels]`" + "indicies are names for columns and rows stored as native Pandas data type.\n", + "\n", + "*Why bother with indices?*\n", + "\n", + "Indices can make lookups and sorting tremendously faster. Brandon Rhodes shows an example that speeds up an operation by 400x. " ] }, { @@ -146,6 +165,21 @@ "flights.index" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Search by Indicies: df.loc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`df.loc`: allows you to use the column, row indicies\n", + " `df.loc[row_labels, column_labels]`" + ] + }, { "cell_type": "code", "execution_count": 26, @@ -206,6 +240,20 @@ "flights.loc[[25, 303], [\"fl_date\", \"origin\"]]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2 - Filtering Data" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -222,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -298,54 +346,102 @@ " ...\n", " \n", " \n", - " 611\n", + " 555\n", " 2014-01-01\n", - " WN\n", - " 19393\n", - " N792SW\n", - " 1880\n", - " LGA\n", - " STL\n", - " 1245.0\n", - " 0.0\n", - " 1419.0\n", - " -16.0\n", + " VX\n", + " 21171\n", + " N840VA\n", + " 415\n", + " JFK\n", + " LAX\n", + " 2015.0\n", + " 15.0\n", + " 2338.0\n", + " 13.0\n", " 0.0\n", - " 2014-01-01 14:19:00\n", - " 2014-01-01 12:45:00\n", + " 2014-01-01 23:38:00\n", + " 2014-01-01 20:15:00\n", " \n", " \n", "\n", - "

480 rows × 14 columns

\n", + "

258 rows × 14 columns

\n", "" ], "text/plain": [ " fl_date unique_carrier airline_id tail_num fl_num origin dest \\\n", "0 2014-01-01 AA 19805 N338AA 1 JFK LAX \n", ".. ... ... ... ... ... ... ... \n", - "611 2014-01-01 WN 19393 N792SW 1880 LGA STL \n", + "555 2014-01-01 VX 21171 N840VA 415 JFK LAX \n", "\n", " dep_time dep_delay arr_time arr_delay cancelled arr \\\n", "0 914.0 14.0 1238.0 13.0 0.0 2014-01-01 12:38:00 \n", ".. ... ... ... ... ... ... \n", - "611 1245.0 0.0 1419.0 -16.0 0.0 2014-01-01 14:19:00 \n", + "555 2015.0 15.0 2338.0 13.0 0.0 2014-01-01 23:38:00 \n", "\n", " dep \n", "0 2014-01-01 09:14:00 \n", ".. ... \n", - "611 2014-01-01 12:45:00 \n", + "555 2014-01-01 20:15:00 \n", "\n", - "[480 rows x 14 columns]" + "[258 rows x 14 columns]" ] }, - "execution_count": 43, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# flights on 2014-01-01 leaving from JFK\n", - "flights[(flights[\"fl_date\"] == \"2014-01-01\") & (flights[\"origin\"].isin([\"JFK\", \"LGA\"]))]" + "flights[(flights[\"fl_date\"] == \"2014-01-01\") & (flights[\"origin\"] == \"JFK\")]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## df.isin" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + " ... \n", + "20816 False\n", + "Name: origin, Length: 20817, dtype: bool" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flights[\"origin\"].isin([\"JFK\", \"LGA\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Str Methods" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Str is an attribute on a series with useful methods:\n", + "* series.str.startswith(\"Ham\")\n", + "* series.str.contains(\"Hamlet\")\n", + "* series.str.extract(regex pattern)\n", + "* series.str.len()" ] }, { @@ -473,26 +569,865 @@ "# use axis=0 to drop rows" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3 - Combining Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Tip: use row indices! **\n", + "\n", + "If you have two dataframe, Pandas will rely on row labels (indicies) to automatically join dataframes!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set Index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`df.set_index(\"DATE\")` or you can directly read a column in as an index:" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "gdp = pd.read_csv('data/gdp.csv', index_col='DATE')" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# remove index\n", + "gdp = gdp.reset_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Handling Missing Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drop" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GDP
DATE
1947-01-01243.1
......
2014-01-0117025.2
\n", + "

269 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " GDP\n", + "DATE \n", + "1947-01-01 243.1\n", + "... ...\n", + "2014-01-01 17025.2\n", + "\n", + "[269 rows x 1 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# returns a new object with na values dropped\n", + "gdp.dropna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fill" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ffill`: fills missing values with the previous available value" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GDP
DATE
1947-01-01243.1
......
2014-01-0117025.2
\n", + "

269 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " GDP\n", + "DATE \n", + "1947-01-01 243.1\n", + "... ...\n", + "2014-01-01 17025.2\n", + "\n", + "[269 rows x 1 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp.fillna(method='ffill')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merging Datasets " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two ways\n", + "\n", + "1. `pd.concat`: Tom recommends this for joining on **Indicies**\n", + "2. `pd.merge`: Tom recommends this for merging on **Columns**" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATEGDPfl_dateunique_carrierairline_idtail_numfl_numorigindestdep_timedep_delayarr_timearr_delaycancelledarrdep
01947-01-01243.1NaTNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaTNaT
...................................................
21085NaNNaN2014-01-31UA19977.0N372931456.0LGAIAH719.0-6.01006.0-20.00.02014-01-31 10:06:002014-01-31 07:19:00
\n", + "

21086 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " DATE GDP fl_date unique_carrier airline_id tail_num \\\n", + "0 1947-01-01 243.1 NaT NaN NaN NaN \n", + "... ... ... ... ... ... ... \n", + "21085 NaN NaN 2014-01-31 UA 19977.0 N37293 \n", + "\n", + " fl_num origin dest dep_time dep_delay arr_time arr_delay \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... ... \n", + "21085 1456.0 LGA IAH 719.0 -6.0 1006.0 -20.0 \n", + "\n", + " cancelled arr dep \n", + "0 NaN NaT NaT \n", + "... ... ... ... \n", + "21085 0.0 2014-01-31 10:06:00 2014-01-31 07:19:00 \n", + "\n", + "[21086 rows x 16 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(gdp, flights, left_on=\"DATE\", right_on=\"dep\", how=\"outer\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4 - Sorting and Grouping Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sorting" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fl_dateunique_carrierairline_idtail_numfl_numorigindestdep_timedep_delayarr_timearr_delaycancelledarrdep
107622014-01-16EV20366N125633805ALBEWR639.0-16.0810.010.00.02014-01-16 08:10:002014-01-16 06:39:00
.............................................
207022014-01-31MQ20398N672MQ3075SYRORDNaNNaNNaNNaN1.0NaTNaT
\n", + "

20817 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " fl_date unique_carrier airline_id tail_num fl_num origin dest \\\n", + "10762 2014-01-16 EV 20366 N12563 3805 ALB EWR \n", + "... ... ... ... ... ... ... ... \n", + "20702 2014-01-31 MQ 20398 N672MQ 3075 SYR ORD \n", + "\n", + " dep_time dep_delay arr_time arr_delay cancelled \\\n", + "10762 639.0 -16.0 810.0 10.0 0.0 \n", + "... ... ... ... ... ... \n", + "20702 NaN NaN NaN NaN 1.0 \n", + "\n", + " arr dep \n", + "10762 2014-01-16 08:10:00 2014-01-16 06:39:00 \n", + "... ... ... \n", + "20702 NaT NaT \n", + "\n", + "[20817 rows x 14 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flights.sort_values(by=[\"origin\", \"dep_delay\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sorting Index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "this can make lookups much faster" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DATEGDP
01947-01-01243.1
.........
2682014-01-0117025.2
\n", + "

269 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " DATE GDP\n", + "0 1947-01-01 243.1\n", + ".. ... ...\n", + "268 2014-01-01 17025.2\n", + "\n", + "[269 rows x 2 columns]" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp.sort_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Grouping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas can perform `groupby` similar to SQL. \n", + "\n", + "CAREFUL: applying groupby returns a **groupby object**, not a dataframe!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You apply aggregator functions on these groups:\n", + "* `.sum(), .size() .mean() .min() .max()`" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dep_delay
origin
ALB20.848429
......
SYR28.597826
\n", + "

11 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " dep_delay\n", + "origin \n", + "ALB 20.848429\n", + "... ...\n", + "SYR 28.597826\n", + "\n", + "[11 rows x 1 columns]" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flights[[\"dep_delay\", \"origin\"]].groupby(\"origin\").mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can apply multiple aggregators" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5 - Manipulating Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Series.apply(function)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 486.2\n", + " ... \n", + "268 34050.4\n", + "Name: GDP, Length: 269, dtype: float64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp[\"GDP\"].apply(lambda x: 2*x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Apply a function to the index" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,\n", + " ...\n", + " 258, 259, 260, 261, 262, 263, 264, 265, 266, 267],\n", + " dtype='int64', length=269)" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdp.index.map(lambda x: x - 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6 - Tips" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xl8VPW9//HXJ3tIgAQIWwABQURQ\nAgTBqmhrq2gXtLdV3KCKoq12sfX26vXeSrW9P7XXWr1SWqwoWhVxp9YNca2VJZE1rGGThCUJIQkQ\nss7398ec2BEICckks+T9fDzmMWc+55yZz9fB+eR8v99zjjnnEBERCRQT6gRERCT8qDiIiMhRVBxE\nROQoKg4iInIUFQcRETmKioOIiBxFxUFERI6i4iAiIkdRcRARkaPEhTqBlurRo4cbOHBgqNMQEYko\nubm5Jc65jKa2a7I4mNlc4FtAkXNupBd7HhjmbZIGlDnnssxsILAe2OitW+Kcu9nbZyzwJJAMvAH8\n1DnnzKwb8DwwENgOXO6c299UXgMHDiQnJ6epzUREJICZ7WjOds3pVnoSmBQYcM5d4ZzLcs5lAS8B\nLwes3tKwrqEweGYDNwJDvUfDe94BLHbODQUWe69FRCSEmiwOzrmPgNJjrTMzAy4Hnjvee5hZH6CL\nc26J81/p7yngUm/1ZGCetzwvIC4iIiHS2gHpc4G9zrnNAbFBZrbCzD40s3O9WCZQELBNgRcD6OWc\n2+0t7wF6tTInERFppdYOSF/Jl48adgMDnHP7vDGGV81sRHPfzBuDaPQa4mY2A5gBMGDAgKPW19bW\nUlBQQFVVVXM/MqIkJSXRr18/4uPjQ52KiES5FhcHM4sDvguMbYg556qBam8518y2AKcAhUC/gN37\neTGAvWbWxzm32+t+KmrsM51zc4A5ANnZ2UcVkYKCAjp37szAgQPx93hFD+cc+/bto6CggEGDBoU6\nHRGJcq3pVvo6sME590V3kZllmFmstzwY/8DzVq/bqMLMJnjjFFOB17zdFgLTvOVpAfETVlVVRffu\n3aOuMACYGd27d4/aoyIRCS9NFgczew74FBhmZgVmNt1bNYWjB6InAqvNbCXwInCzc65hMPtHwF+A\nfGAL8KYXvw/4hpltxl9w7mtFe6KyMDSI5raJSHhpslvJOXdlI/EfHCP2Ev6prcfaPgcYeYz4PuCC\npvIQEZGWcc6xZGspS7bua/Y+unxGkO3du5errrqKwYMHM3bsWM466yxeeeUVPvjgA7p27cro0aMZ\nNmwYEydO5PXXX/9iv5kzZ5KZmUlWVhYjR45k4cKFIWyFiESTZ5d9zpWPLeGR9zY3vbFHxSGInHNc\neumlTJw4ka1bt5Kbm8v8+fMpKPAPy5x77rmsWLGCjRs38sgjj3DrrbeyePHiL/a/7bbbWLlyJS+8\n8ALXX389Pp8vVE0RkSjh8zke/3gbZ/Tryqq7L2z2fioOQfTee++RkJDAzTf/68Twk046iR//+MdH\nbZuVlcWvfvUrHn300aPWDR8+nLi4OEpKSto0XxGJfp9sKWFrySGuO3sgXZKaPw0+Yi+815Rf/y2P\ndbsqgvqep/Xtwt3fbvy0jby8PMaMGdPs9xszZgy/+93vjoovXbqUmJgYMjKavDaWiMhxzfloKz1S\nE7jk9D4ntJ+OHNrQLbfcwqhRoxg3btwx1/uvJPIvDz30EFlZWdx+++08//zzmp0kIq2yfHspH28u\nYcbEwSTGxZ7QvlF75HC8v/DbyogRI3jppX9N1po1axYlJSVkZ2cfc/sVK1YwfPjwL17fdttt3H77\n7W2ep4hEv52llcxcmEeP1ESunTDwhPfXkUMQfe1rX6OqqorZs2d/EausrDzmtqtXr+bee+/llltu\naa/0RKSD2FZyiEl/+IjtJYf4zaUjSE44saMGiOIjh1AwM1599VVuu+02HnjgATIyMkhJSeH+++8H\n4OOPP2b06NFUVlbSs2dPHnnkES64QKd4iEjwOOeYuTAPM+PNn55L/26dWvQ+Kg5B1qdPH+bPn3/M\ndeXl5Y3uN3PmzDbKSEQ6iuq6eh5+dzMfbirmv745vMWFAVQcRESiwqHqOq57cjnLtpXyb2P6Me0r\nA1v1fioOIiIRrt7nuPGpHHK2l/KHK7K4dHRm0zs1IeoGpI+cHhpNorltItJys97P559b9nHfd88I\nSmGAKCsOSUlJ7Nu3Lyp/RBvu55CUlBTqVEQkjGzcc4CHF29mclZfLh/XP2jvG1XdSv369aOgoIDi\n4uJQp9ImGu4EJyLS4L4319MpIZaZQT63K6qKQ3x8vO6SJiIdxj/zS3h/YzF3Xnwq6SkJQX3vqOpW\nEhHpKHw+x/97cwOZacmtnpl0LCoOIiIR6LVVhawpLOf2i04hKf7Ez4BuioqDiEiE2bjnAP/9ah6j\n+qcxeVRwZicdScVBRCSC1NX7+OEzuXRKiOXP14wlJqZtrt4cVQPSIiLR7o21e9hafIg/XTOG3l3b\nbmp7k0cOZjbXzIrMbG1AbKaZFZrZSu9xScC6O80s38w2mtlFAfFJXizfzO4IiA8ys6Ve/HkzC+6Q\nu4hIFKip8/HaykIefncTQ3qmcuFpvdv085rTrfQkMOkY8Yecc1ne4w0AMzsNmAKM8Pb5o5nFmlks\nMAu4GDgNuNLbFuB+772GAPuB6a1pkIhINPqfN9bz0/kr2bGvktsvHNZm3UkNmiwOzrmPgNJmvt9k\nYL5zrto5tw3IB870HvnOua3OuRpgPjDZ/Lc6+xrworf/PODSE2yDiEhUW11QxrxPt3P1+AGsuvtC\nJo1s26MGaN2A9K1mttrrdkr3YpnAzoBtCrxYY/HuQJlzru6IuIiI4B+A/s9X1pCRmsh/XHwqKYnt\nM1Tc0uIwGzgZyAJ2Aw8GLaPjMLMZZpZjZjnReokMEZFAT326g7WFFfzq26fRJSm+3T63RcXBObfX\nOVfvnPMBj+HvNgIoBAKv/NTPizUW3wekmVncEfHGPneOcy7bOZedkZHRktRFRMLCx5uL+fFzK5i5\nMK/Rbcora3no3U2cd0oG3zy9Tztm18LiYGaBWV4GNMxkWghMMbNEMxsEDAWWAcuBod7MpAT8g9YL\nnf/yqe8D3/P2nwa81pKcREQixcqdZVz3xHLeydvDk//czvaSQ8fc7rGPt3Kgqo47Lj4V/xBt+2nO\nVNbngE+BYWZWYGbTgQfMbI2ZrQa+CtwG4JzLAxYA64C3gFu8I4w64FbgbWA9sMDbFuA/gJ+bWT7+\nMYjHg9pCEZEw8drKQs6+7z2ufXwpvboksfDWczCDV1Yc3WGytfggT3yyjW+e0Yfhfbq0e64Wqfc+\nyM7Odjk5OaFOQ0SkWT7cVMz0J5cztFdnenVJ5GdfP4Ws/mlc9dgSCssO88Ht539xdFBUUcVlf/wn\nVbX1vPKjsxnQveX3gj6SmeU657Kb2k6XzxARaWN7yqv46fwVDOmZyoKbJvDkdWeS1T8NgO+N7ceO\nfZUsXLXri+3veX0dJQermXf9mUEtDCdCxUFEpA055/j3F1dRXetj1tVj6HzEjKPJWZmM6teVe/62\njp2llby3YS+vr97NzeedzMjMriHKWtdWEhFpU4vXF/Hx5hJmfvs0Ts5IPWp9bIxx37+dwaWzPuHc\nB94HYEC3Ttx83sntneqXqDiIiLSRmjof//PmegZnpHD1hJMa3W54ny4s/sV5vPxZIV2T47lsTCbJ\nCcG/R8OJUHEQEWkjf/wgn63Fh3jiB+OIjz1+L36/9E785IKh7ZRZ0zTmICLSBjbuOcCs9/OZnNWX\nr57aM9TpnDAVBxGRIKur9/HLF1fRJSmeu789ItTptIi6lUREguzR9/NZVVDOo1eNpltKZN6iRkcO\nIiJB9MQn2/jDu5v57ujMdr8eUjCpOIiIBEnujv3c+/o6LjytFw9874x2vx5SMKk4iIgEQVVtPbc9\nv5K+ack8ePko4pqYnRTuNOYgIhIEb+ft4fPSSp64btxRZ0FHosgubSIiYeLZpZ9zUvdOnDc0Ou41\no+IgItJKW4oPsnRbKVeM609MTOSOMwRScRARaaX739xAcnws3x/bv+mNI4SKg4hIK7y/sYh31u3l\nxxcMIaNzYqjTCRoVBxGRFnLO8eA7GzmpeyduOGdwqNMJKhUHEZEW+nTLPtYWVnDzeSeTEBddP6fR\n1RoRkXbinGPWB/n0SE3kstGZoU4n6FQcRERa4NWVhXySv48fnX8ySfGhvfdCW1BxEBE5QeWHa/n1\n39YxZkAa074yMNTptIkmi4OZzTWzIjNbGxD7nZltMLPVZvaKmaV58YFmdtjMVnqPPwXsM9bM1phZ\nvpk9Yt5FR8ysm5ktMrPN3nN6WzRURCRYXswtoKyyll9/ZySxUXJew5Gac+TwJDDpiNgiYKRz7gxg\nE3BnwLotzrks73FzQHw2cCMw1Hs0vOcdwGLn3FBgsfdaRCQs+XyOpz/dztiT0jm9X9dQp9NmmiwO\nzrmPgNIjYu845+q8l0uAfsd7DzPrA3Rxzi1xzjngKeBSb/VkYJ63PC8gLiISdt7bUMT2fZVMPavx\ne0JHg2CMOVwPvBnwepCZrTCzD83sXC+WCRQEbFPgxQB6Oed2e8t7gF5ByElEJOjq6n3c/9YGBnbv\nxMUjI/deDc3RqquymtldQB3wjBfaDQxwzu0zs7HAq2bW7HvkOeecmbnjfN4MYAbAgAEDWp64iEgL\nvJBbwOaig/zpmjFRd17DkVrcOjP7AfAt4GqvqwjnXLVzbp+3nAtsAU4BCvly11M/Lwaw1+t2auh+\nKmrsM51zc5xz2c657IyM6LjyoYhEhkPVdTz4ziayT0rnohG9Q51Om2tRcTCzScAvge845yoD4hlm\nFustD8Y/8LzV6zaqMLMJ3iylqcBr3m4LgWne8rSAuIhI2PjzR1spOVjNXd8cHtF3eGuuJruVzOw5\n4Hygh5kVAHfjn52UCCzy/iMt8WYmTQTuMbNawAfc7JxrGMz+Ef6ZT8n4xygaxinuAxaY2XRgB3B5\nUFomIhIkeyuqeOyjrXzrjD6MHtAxZts3WRycc1ceI/x4I9u+BLzUyLocYOQx4vuAC5rKQ0QkVH7/\nzibqfY7/mHRqqFNpN9E9oiIi0kprCspZkLuTqWedRP9unUKdTrtRcRARaUS9z3HXq2vokZrIT74+\nNNTptCsVBxGRRsxf/jmrC8r5r28Op0tSfKjTaVcqDiIix3Cwuo6HFm1i3MB0vjOqb6jTaXcqDiIi\nx/D4x9soOVjDXd88rUNMXT2SioOIyBFq6nw8vWQHF5zak6z+aaFOJyRUHEREjvDm2t2UHKzm2ii/\nuN7xqDiIiASoqfPx2MdbGdi9ExOHdtzL9Kg4iIgE+O3f17G2sILbLxpGTJTeyKc5VBxERDwv5hYw\n79Md3HDOIL51RseboRSoVZfsFhGJBs453lizh/98ZQ1nDe7OHRd3nMtkNEbFQUQ6tLp6H3e+vIYX\ncgsYmdmFR68aTVysOlVUHESkw6qp83Hb8yv5+5rd/PhrQ/jpBUNVGDwqDiLSIdXW+/jhX3NZvKGI\n//rmcG44d3CoUworKg4i0iH99u/rWbyhiHsvHcm1Ezru+QyNUXEQkQ5jd/lhNuw+wJtrd7Mgp4Dp\n5wxSYWiEioOIdAjbSg4x+dF/UFFVR1yMMWPiYH550bBQpxW2VBxEJKrV1Pl4+bMCZn+4hdgY45kb\nxnNyRiq9uyaFOrWwpuIgIlGr3ue+mI00qEcKc6ZmM25gt1CnFRFUHEQkKlVU1fIfL67mzbV7uOuS\n4dxw7qAOeentlmrWhF4zm2tmRWa2NiDWzcwWmdlm7zndi5uZPWJm+Wa22szGBOwzzdt+s5lNC4iP\nNbM13j6PmL5BEWkF5xw/mLuMd9bt5a5LhnPjxMEqDCeouWd7PAlMOiJ2B7DYOTcUWOy9BrgYGOo9\nZgCzwV9MgLuB8cCZwN0NBcXb5saA/Y78LBGRZlu+fT+ffV7GzG+fxo0Tdf5CSzSrODjnPgJKjwhP\nBuZ5y/OASwPiTzm/JUCamfUBLgIWOedKnXP7gUXAJG9dF+fcEuecA54KeC8RkRP25D+30TU5nu+N\n7R/qVCJWa84T7+Wc2+0t7wF6ecuZwM6A7Qq82PHiBceIi4icsEXr9vLW2j1MGdef5ITYUKcTsYJy\nERHvL34XjPc6HjObYWY5ZpZTXFzc1h8nIhHms8/386Nncjk9syu3fG1IqNOJaK0pDnu9LiG85yIv\nXggEHsv182LHi/c7Rvwozrk5zrls51x2RkbHvUOTiBzb797aSNfkBJ6aPp4uSfGhTieitaY4LAQa\nZhxNA14LiE/1Zi1NAMq97qe3gQvNLN0biL4QeNtbV2FmE7xZSlMD3ktEpFmWbSvl0637uPm8wXRN\nVmForWad52BmzwHnAz3MrAD/rKP7gAVmNh3YAVzubf4GcAmQD1QC1wE450rN7F5gubfdPc65hkHu\nH+GfEZUMvOk9RESapabOx6//lkeP1ESuHq9rJQVDs4qDc+7KRlZdcIxtHXBLI+8zF5h7jHgOMLI5\nuYiIHOmRxZvJ21XBn68dq0HoINFdLUQkor2Qs5NH38/n+2P7cdGI3qFOJ2qoOIhIxFq0bi93vLyG\nc4b04DeXqfMhmFQcRCQirS0s59ZnP2Nk3y78+dqxJMapOymYVBxEJOL4fI67Xl1L56R4nrjuTFIS\ndQ3RYFNxEJGI80LuTlbtLOM/LzmVbikJoU4nKqk4iEhE2Vlayb2vr+fMQd24bLSutNNWVBxEJGIc\nrK7jx8+tAODB74/SZbjbkDrqRCQilB+u5fonl7OmsJxZV42hf7dOoU4pqqk4iEjYKzlYzdTHl7G5\n6ACPXjmaSSN1PkNbU3EQkbC2q+ww1/xlKbvKD/PY1GzOH9Yz1Cl1CCoOIhK2qmrrmT4vh+ID1Tw9\nfTzjBnYLdUodhoqDiIStmQvzWL+7gid+ME6FoZ1ptpKIhKW/LtnB/OU7ueWrJ/PVU9WV1N5UHEQk\n7Czduo+ZC/P46rAMfv6NYaFOp0NScRCRsFJYdpgfPfMZA7p14uErRxMbo3MZQkHFQUTCxuGaemY8\nlUNNnY85U7N1q88Q0oC0iISFwzX1/PCZXNbtruAvU7MZ0jM11Cl1aCoOIhJyPp9jxtM5/CO/hN9e\nejoXDO8V6pQ6PHUriUjIvZC7k483l3DP5JFcNX5AqNMRVBxEJMRWF5Tx27/7r7J6jQpD2FBxEJGQ\n+XBTMVPmLKFzUjz/+z1dZTWctLg4mNkwM1sZ8Kgws5+Z2UwzKwyIXxKwz51mlm9mG83sooD4JC+W\nb2Z3tLZRIhL+XltZyA3zljOwewqv3PIVBnTXVVbDSYsHpJ1zG4EsADOLBQqBV4DrgIecc/8buL2Z\nnQZMAUYAfYF3zewUb/Us4BtAAbDczBY659a1NDcRCV/1PsdfPt7KfW9t4MyB3XhsmqashqNgzVa6\nANjinNtxnMPCycB851w1sM3M8oEzvXX5zrmtAGY239tWxUEkynz2+X5+9dpa1hZWMGlEb/4wJYuk\n+NhQpyXHEKwxhynAcwGvbzWz1WY218zSvVgmsDNgmwIv1lj8KGY2w8xyzCynuLg4SKmLSHt4MbeA\n7/7xnxQfqOb/rhzN7GvGqDCEsVYXBzNLAL4DvOCFZgMn4+9y2g082NrPaOCcm+Ocy3bOZWdkZATr\nbUWkjX24qZg7X17N2UO6s/gX5/PtUX01+BzmgtGtdDHwmXNuL0DDM4CZPQa87r0sBPoH7NfPi3Gc\nuIhEuCc+2ca9r6/jlF6d+ePVY0lN1Lm3kSAY3UpXEtClZGZ9AtZdBqz1lhcCU8ws0cwGAUOBZcBy\nYKiZDfKOQqZ424pIhHs7bw+//ts6vj68Fy/98Ct0TdbAc6RoVQk3sxT8s4xuCgg/YGZZgAO2N6xz\nzuWZ2QL8A811wC3OuXrvfW4F3gZigbnOubzW5CUioZe7Yz+/WLCKUf268siVozW+EGHMORfqHFok\nOzvb5eTkhDoNETmGVTvLuOqxJfTsksRzN06gd9ekUKckHjPLdc5lN7WdOv9EJKj2lFdx41M5pKck\n8PyMCfTsosIQiXT5DBEJmk+37GPyrH9wqLqOx6eNU2GIYDpyEJFWO1Rdx+wPtjDrg3wGdU9h7g/G\nMax351CnJa2g4iAirfLRpmJ+Mn8FZZW1fHdMJvdOHkmKpqtGPH2DItJi728s4qancxncI4UnfjCO\n0QPSm95JIoKKg4i0yOL1e/nhXz/jlN6p/HX6eNI6JYQ6JQkiDUiLyAl7J28PN/81l1P7dOaZ6RNU\nGKKQioOInJC31u7hR898xml9u/L09PF07aSznqORupVEpFkqa/wzkmZ/sIXT+3Vl3vVn6j4MUUzF\nQUSatGpnGT+dv4Lt+yr5zqi+/PaykXRWYYhqKg4i0qiaOh+vrCjgv1/NI6NzIs/dOIGzTu4e6rSk\nHag4iMiXrC0s5/63NrB0WyngLxATBndj9tVjSU/RwHNHoeIgIl/465Id/PpveXROiueqMwcQH2uM\nH9Sd84dlEBer+SsdiYqDiADwQs5O/uvVtXx1WAZ/uGK0ZiF1cCoOIh3cweo6nvxkGw8v3szZQ7oz\nZ2o28TpK6PBUHEQ6sE17D3DT07lsKznE14f35H+/P0qFQQAVB5EOK29XOVc9tpT42BjNQpKjqDiI\ndECb9h7g2seXkZIQy/M3nUX/bp1CnZKEGR0/inQwW4sPctVjS4mLMZ65cYIKgxyTioNIB/L5vkqu\n/stSnHM8e+N4BvVICXVKEqZaXRzMbLuZrTGzlWaW48W6mdkiM9vsPad7cTOzR8ws38xWm9mYgPeZ\n5m2/2cymtTYvEfmyVTvL+O7sTzhcW89fbxjPkJ66U5s0LlhHDl91zmU557K913cAi51zQ4HF3muA\ni4Gh3mMGMBv8xQS4GxgPnAnc3VBQRKT11u+u4Jq/LCUpPpYXbz6L4X26hDolCXNt1a00GZjnLc8D\nLg2IP+X8lgBpZtYHuAhY5Jwrdc7tBxYBk9ooN5EO5d11e7n28aWkJsWx4KazdMQgzRKM4uCAd8ws\n18xmeLFezrnd3vIeoJe3nAnsDNi3wIs1FheRVnj8H9u44akceqQm8vT08fRNSw51ShIhgjGV9Rzn\nXKGZ9QQWmdmGwJXOOWdmLgifg1d8ZgAMGDAgGG8pErVeyi3gN39fx6QRvfm/q0br5DY5Ia3+1+Kc\nK/Sei4BX8I8Z7PW6i/Cei7zNC4H+Abv382KNxY/8rDnOuWznXHZGRkZrUxeJSjV1Pu5+bS2/eGEV\n4wd14w9TslQY5IS16l+MmaWYWeeGZeBCYC2wEGiYcTQNeM1bXghM9WYtTQDKve6nt4ELzSzdG4i+\n0IuJyAnILzrA5X/+lHmf7uDGcwfx9PTxJMXHhjotiUCt7VbqBbxiZg3v9axz7i0zWw4sMLPpwA7g\ncm/7N4BLgHygErgOwDlXamb3Asu97e5xzpW2MjeRDqNgfyWz3s/nxdwCUhLj+OPVY7jk9D6hTksi\nmDkXlOGAdpedne1ycnJCnYZISG0vOcSCnJ3M/WQbPgdXZPfnZ18fSvfUxFCnJmHKzHIDTjtolK6t\nJBKB9h2s5veLNvHcss8BuHhkH+765nDNRpKgUXEQiTCvrCjgV6/lUVlTz9SzBnLzeSfTu2tSqNOS\nKKPiIBIhqmrreXjxZmZ/sIUzB3bjf747Uie0SZtRcRCJAPlFB5nxdA5biw9xRXZ/7r10JAlxmp4q\nbUfFQSSM1fscL+Ts5LdvrCcxLoZ515/JeafoHB9peyoOImFqW8khfr5gJSs+LyP7pHQeuiJL916Q\ndqPiIBKGVu0sY+rcZQD84YosJmf1xTufSKRdqDiIhJm8XeVc85elpKXE8+wNulObhIaKg0gY2VZy\niGlzl5OaFMfzM87SeQsSMpruIBImthYfZMqcT/E5x1PXn6nCICGlIweRMLCl+CBXzllCvc/x3I0T\nGNpL5y9IaKk4iIRYftFBrnxsCc45npsxgVNUGCQMqDiIhNB7G/byyxdXA6YjBgkrKg4i7az4QDWv\nrijklRWFrNtdwam9O/PoVWMY0jM11KmJfEHFQaSNFVVU8fKKQvKLDgLw+updVNX6yOqfxj2TR3DF\nuP4kxumGPBJeVBxE2tDKnWXc/HQueyqq6Nk5karaei48rTc/uWCojhQkrKk4iLSB8spafvDkMlZ8\nXkavLon8/SfnMKJv11CnJdJsKg4iQebzOX6+YCVrC8v572+dxmWjM+mWkhDqtEROiIqDSJDNej+f\nxRuKuGfyCKaeNTDU6Yi0iIqDSJD4fI4FOTv5/bubuGx0JtdOOCnUKYm0mIqDSCu9v7GI+9/cQMH+\nwxysrmPcwHT+57LTdRVViWgtvraSmfU3s/fNbJ2Z5ZnZT734TDMrNLOV3uOSgH3uNLN8M9toZhcF\nxCd5sXwzu6N1TRJpH0UVVdzy7Gdc98Ryaup9fG9sPx6eksXzM84iOUFTUyWytebIoQ74hXPuMzPr\nDOSa2SJv3UPOuf8N3NjMTgOmACOAvsC7ZnaKt3oW8A2gAFhuZgudc+takZtIm/H5HM8s+5wH3txA\ndb2PX3zjFGacN1jnKkhUaXFxcM7tBnZ7ywfMbD2QeZxdJgPznXPVwDYzywfO9NblO+e2ApjZfG9b\nFQcJK8453t9YxMOL81m1s4yzh3TnN5eezqAeKaFOTSTogjLmYGYDgdHAUuBs4FYzmwrk4D+62I+/\ncCwJ2K2AfxWTnUfExzfyOTOAGQADBgwIRuoizVJZU8edL6/htZW7yExL5veXj+Ky0ZkaV5Co1er7\nOZhZKvAS8DPnXAUwGzgZyMJ/ZPFgaz+jgXNujnMu2zmXnZGhm6xL+3h/YxFff/BDFq7axS++cQof\n/Pv5fHdMPxUGiWqtOnIws3j8heEZ59zLAM65vQHrHwNe914WAv0Ddu/nxThOXCSkcneUctNTuQzq\nkcLDV45m3MBuoU5JpF20uDiY/8+mx4H1zrnfB8T7eOMRAJcBa73lhcCzZvZ7/APSQ4FlgAFDzWwQ\n/qIwBbiqpXmJBMO2kkM8u3QH85ftpG9aEs/fNIG0TjrLWTqO1hw5nA1cC6wxs5Ve7D+BK80sC3DA\nduAmAOdcnpktwD/QXAfc4pxrVS/bAAAKC0lEQVSrBzCzW4G3gVhgrnMurxV5ibRYUUUVv/n7ehau\n2kVcjHHRiN78ctIwFQbpcMw5F+ocWiQ7O9vl5OSEOg2JElW19Tyz9HMeWrSJmnofM84dzNSzTqJn\nl6RQpyYSVGaW65zLbmo7nSEtHVp5ZS0LcnYy5+OtFB+o5tyhPbhn8khNT5UOT8VBOpyq2npe+qyA\nN9bsZsnWUup9jrOHdOfhKVmcNbi7ZiGJoOIgHUhdvY+XPyvkD+9uYld5FSdnpHDTxMFccnofRmbq\nXgsigVQcJOodrK7j7bV7+OMH+WwpPsSofl353fdHcfaQHqFOTSRsqThI1Kn3OdYUlvPW2j28tXY3\nO0orcQ6G9EzlT9eM4aIRvdV1JNIEFQeJCvsOVrNo3V427DnAG2t2U3SgmtgYY+LQHkzOyuTcoT0Y\nMyCdmBgVBZHmUHGQiLVuVwWvrSxk+fZS1hSWU1vvSIyL4dyhGXx7VB/OHZqh23OKtJCKg0SUssoa\nPtxUzPxlO/l06z7iYowxA9K5/uxBXDYmk1N6dtbRgUgQqDhIWDtcU8/qgjKWby/lzbV7yNtVAUBm\nWjJ3Xnwql2f3J11HByJBp+IgYaWu3sd7G4r4dOs+Ptuxn7xdFdT5/GfxZ/VP4/YLT2H84O6M1fiB\nSJtScZB2V1Pn4/PSSooqqthTUcW2kkNs2nuAwrLD7CmvpuRgNUnxMYzql8aMiYMZe1I6oweka/xA\npB2pOEibcc5RcrCG/KKD5BcdIG9XBWt3lbNxzwFq6/91Ta/YGGNg907079aJwT1S+dYZffjqqT2J\nj2317UZEpIVUHCQonHPsr6xla/FBNu49wLJtpSzZuo+9FdVfbJPWKZ7TM7ty/TmDOLV3Z3p3SaZX\nl0T6pXciIU6FQCScqDhIs+2tqGLF52UU7K+k6EA1eyuq2FtRRVGFf/lQTf0X22Z0TmTC4O5k9U9j\naM9UhvRMpU/XJJ18JhIhVBzkS8oP17KztNL/2F/J56WV7Cw9TH7RQQrLDn+xXWJcDL26JNGrSyLD\n+3bh/GE96ZuWxMkZqZyckUr/bskqBCIRTMWhA2jo8ik5WE3F4VoqqmopOVDj/8v/QBV7yqspLDtM\n4f5KKqrqvrRv1+R4+ndLZvSANK4/ZxBjBqQxuEcqXZLj9OMvEsVUHCJAw8Bu+eFaaup8HK6tZ3f5\nYSpr6sGBzzkc/ueyytp/dfUc8D8XHaj60gBwoPRO8fTsnERmejLjBqbTLz2Z/un+weH+3TrRNTm+\nfRsrImFBxaGNOec4VFPP/kM1lFXWsr+yhv2V/uWyylqq6uqp9znq6h0+50hNjMPnHKWHary/5g9T\nWHaY6jpfsz+zS1Kc1+WTxPhBKfT0un96pCbSNTmezklx9EhNJKNzIknxsW3YehGJVCoOHuccB6vr\nqKyp9z9X11NZU0dVnY+DVXUUHaii+EA1VbU+6n0+fA4S4mJIiIvB53NU1/moqq2nrLKW0soayipr\n2F9ZS3llLTX1jf+wJ8TGEBtjxMUYGByqrsPMSO8UT2ZaMqf26cwFw3uSmZZMekoCiXExJMbF0ict\niZSEOGJiDAPMIMaMrsnx+sEXkVaL+OLgnP+HueEH/VBNHYeq6zhcW0+sGfFxMcTHxhAfaxhG8cFq\ndpUdZleZ/y/y3WVV7Co/zO7yKmqa+Os8NsboFB/r/0E2qK3zUVPvI8aMBO9HO71TPOmdEhjUI4Ux\nnRJI65TwRSytUzzpKQmke7GuyfHEHTGXv+Ge3urPF5FQsoYfo1Azs0nAw0As8Bfn3H3H2z613zA3\nZMb/cbCq7ovLK5yIGINeXZLo0zWJvmnJZKYl0yM1kZTEOFISY+mUEEdKQiyJ8bGkJMaSkZpIeqcE\nXbJBRCKameU657Kb2i4sjhzMLBaYBXwDKACWm9lC59y6xvZJSYxl8qi+pCbFkZIYR2piHCkJ//ph\nT06Ixedz1NY7auv9f+H7fI4enRPpm5ZMr86JR/3VLiIifmFRHIAzgXzn3FYAM5sPTAYaLQ790zvx\n68kj2yk9EZGOJVz+dM4Edga8LvBiIiISAuFSHJrFzGaYWY6Z5RQXF4c6HRGRqBUuxaEQ6B/wup8X\n+xLn3BznXLZzLjsjI6PdkhMR6WjCpTgsB4aa2SAzSwCmAAtDnJOISIcVFgPSzrk6M7sVeBv/VNa5\nzrm8EKclItJhhUVxAHDOvQG8Eeo8REQkfLqVREQkjKg4iIjIUcLm8hknyswOABtDnUcI9ABKQp1E\niHTUtnfUdkPHbXtbtvsk51yT0z3DZsyhBTY25/og0cbMcjpiu6Hjtr2jths6btvDod3qVhIRkaOo\nOIiIyFEiuTjMCXUCIdJR2w0dt+0dtd3Qcdse8nZH7IC0iIi0nUg+chARkTYSccXBzCaZ2UYzyzez\nO0KdT1szs+1mtsbMVppZjhfrZmaLzGyz95we6jxby8zmmlmRma0NiB2zneb3iPdvYLWZjQld5q3X\nSNtnmlmh972vNLNLAtbd6bV9o5ldFJqsW8/M+pvZ+2a2zszyzOynXjyqv/fjtDu8vnPnXMQ88F93\naQswGEgAVgGnhTqvNm7zdqDHEbEHgDu85TuA+0OdZxDaOREYA6xtqp3AJcCbgAETgKWhzr8N2j4T\nuP0Y257m/btPBAZ5/z/EhroNLWx3H2CMt9wZ2OS1L6q/9+O0O6y+80g7cvjijnHOuRqg4Y5xHc1k\nYJ63PA+4NIS5BIVz7iOg9IhwY+2cDDzl/JYAaWbWp30yDb5G2t6YycB851y1c24bkI///4uI45zb\n7Zz7zFs+AKzHf5OvqP7ej9PuxoTkO4+04tAR7xjngHfMLNfMZnixXs653d7yHqBXaFJrc421s6P8\nO7jV6z6ZG9B1GJVtN7OBwGhgKR3oez+i3RBG33mkFYeO6Bzn3BjgYuAWM5sYuNL5jzujfspZR2ln\ngNnAyUAWsBt4MLTptB0zSwVeAn7mnKsIXBfN3/sx2h1W33mkFYdm3TEumjjnCr3nIuAV/IeTexsO\np73notBl2KYaa2fU/ztwzu11ztU753zAY/yrGyGq2m5m8fh/IJ9xzr3shaP+ez9Wu8PtO4+04tCh\n7hhnZilm1rlhGbgQWIu/zdO8zaYBr4UmwzbXWDsXAlO92SsTgPKAboiocERf+mX4v3fwt32KmSWa\n2SBgKLCsvfMLBjMz4HFgvXPu9wGrovp7b6zdYfedh3rkvgUj/ZfgH93fAtwV6nzauK2D8c9SWAXk\nNbQX6A4sBjYD7wLdQp1rENr6HP5D6Vr8farTG2sn/tkqs7x/A2uA7FDn3wZtf9pr22r8Pw59Ara/\ny2v7RuDiUOffinafg7/LaDWw0ntcEu3f+3HaHVbfuc6QFhGRo0Rat5KIiLQDFQcRETmKioOIiBxF\nxUFERI6i4iAiIkdRcRARkaOoOIiIyFFUHERE5Cj/H32D6t3fpfOrAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "gdp.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Paste Dataframe into Excel\n", + "`to_clipboard()`" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "gdp.to_clipboard()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Streaming with Iterators!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Dask](https://dask.pydata.org/en/latest/): is an option to consider for distributing Pandas-like operations. It lazily defines a directed acyclic graph for computations. The API is very similar to that of Pandas." + ] } ], "metadata": {