diff --git a/README.md b/README.md
index ff01b4c..fc9d95b 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,9 @@ A flask skeleton with the app factory pattern and route blueprints for clean int
## [Flask Kafka Streaming](https://github.com/marksibrahim/recipes/tree/master/flask-kafka-streaming) (via Sockets)
A flask app for streaming from Kafka via web sockets using socketio.
+## [Modern Pandas](https://github.com/marksibrahim/recipes/tree/master/modern-pandas)
+Pandas recipes and best practices
+
## [Open a New Chrome Window on Mac](https://github.com/marksibrahim/recipes/tree/master/mac-new-chrome-window)
An AppleScript to open a new chrome window in the current desktop.
diff --git a/modern-pandas/data/gdp.csv b/modern-pandas/data/gdp.csv
new file mode 100644
index 0000000..f347bb9
--- /dev/null
+++ b/modern-pandas/data/gdp.csv
@@ -0,0 +1,270 @@
+DATE,GDP
+1947-01-01,243.1
+1947-04-01,246.3
+1947-07-01,250.1
+1947-10-01,260.3
+1948-01-01,266.2
+1948-04-01,272.9
+1948-07-01,279.5
+1948-10-01,280.7
+1949-01-01,275.4
+1949-04-01,271.7
+1949-07-01,273.3
+1949-10-01,271.0
+1950-01-01,281.2
+1950-04-01,290.7
+1950-07-01,308.5
+1950-10-01,320.3
+1951-01-01,336.4
+1951-04-01,344.5
+1951-07-01,351.8
+1951-10-01,356.6
+1952-01-01,360.2
+1952-04-01,361.4
+1952-07-01,368.1
+1952-10-01,381.2
+1953-01-01,388.5
+1953-04-01,392.3
+1953-07-01,391.7
+1953-10-01,386.5
+1954-01-01,385.9
+1954-04-01,386.7
+1954-07-01,391.6
+1954-10-01,400.3
+1955-01-01,413.8
+1955-04-01,422.2
+1955-07-01,430.9
+1955-10-01,437.8
+1956-01-01,440.5
+1956-04-01,446.8
+1956-07-01,452.0
+1956-10-01,461.3
+1957-01-01,470.6
+1957-04-01,472.8
+1957-07-01,480.3
+1957-10-01,475.7
+1958-01-01,468.4
+1958-04-01,472.8
+1958-07-01,486.7
+1958-10-01,500.4
+1959-01-01,511.1
+1959-04-01,524.2
+1959-07-01,525.2
+1959-10-01,529.3
+1960-01-01,543.3
+1960-04-01,542.7
+1960-07-01,546.0
+1960-10-01,541.1
+1961-01-01,545.9
+1961-04-01,557.4
+1961-07-01,568.2
+1961-10-01,581.6
+1962-01-01,595.2
+1962-04-01,602.6
+1962-07-01,609.6
+1962-10-01,613.1
+1963-01-01,622.7
+1963-04-01,631.8
+1963-07-01,645.0
+1963-10-01,654.8
+1964-01-01,671.1
+1964-04-01,680.8
+1964-07-01,692.8
+1964-10-01,698.4
+1965-01-01,719.2
+1965-04-01,732.4
+1965-07-01,750.2
+1965-10-01,773.1
+1966-01-01,797.3
+1966-04-01,807.2
+1966-07-01,820.8
+1966-10-01,834.9
+1967-01-01,846.0
+1967-04-01,851.1
+1967-07-01,866.6
+1967-10-01,883.2
+1968-01-01,911.1
+1968-04-01,936.3
+1968-07-01,952.3
+1968-10-01,970.1
+1969-01-01,995.4
+1969-04-01,1011.4
+1969-07-01,1032.0
+1969-10-01,1040.7
+1970-01-01,1053.5
+1970-04-01,1070.1
+1970-07-01,1088.5
+1970-10-01,1091.5
+1971-01-01,1137.8
+1971-04-01,1159.4
+1971-07-01,1180.3
+1971-10-01,1193.6
+1972-01-01,1233.8
+1972-04-01,1270.1
+1972-07-01,1293.8
+1972-10-01,1332.0
+1973-01-01,1380.7
+1973-04-01,1417.6
+1973-07-01,1436.8
+1973-10-01,1479.1
+1974-01-01,1494.7
+1974-04-01,1534.2
+1974-07-01,1563.4
+1974-10-01,1603.0
+1975-01-01,1619.6
+1975-04-01,1656.4
+1975-07-01,1713.8
+1975-10-01,1765.9
+1976-01-01,1824.5
+1976-04-01,1856.9
+1976-07-01,1890.5
+1976-10-01,1938.4
+1977-01-01,1992.5
+1977-04-01,2060.2
+1977-07-01,2122.4
+1977-10-01,2168.7
+1978-01-01,2208.7
+1978-04-01,2336.6
+1978-07-01,2398.9
+1978-10-01,2482.2
+1979-01-01,2531.6
+1979-04-01,2595.9
+1979-07-01,2670.4
+1979-10-01,2730.7
+1980-01-01,2796.5
+1980-04-01,2799.9
+1980-07-01,2860.0
+1980-10-01,2993.5
+1981-01-01,3131.8
+1981-04-01,3167.3
+1981-07-01,3261.2
+1981-10-01,3283.5
+1982-01-01,3273.8
+1982-04-01,3331.3
+1982-07-01,3367.1
+1982-10-01,3407.8
+1983-01-01,3480.3
+1983-04-01,3583.8
+1983-07-01,3692.3
+1983-10-01,3796.1
+1984-01-01,3912.8
+1984-04-01,4015.0
+1984-07-01,4087.4
+1984-10-01,4147.6
+1985-01-01,4237.0
+1985-04-01,4302.3
+1985-07-01,4394.6
+1985-10-01,4453.1
+1986-01-01,4516.3
+1986-04-01,4555.2
+1986-07-01,4619.6
+1986-10-01,4669.4
+1987-01-01,4736.2
+1987-04-01,4821.5
+1987-07-01,4900.5
+1987-10-01,5022.7
+1988-01-01,5090.6
+1988-04-01,5207.7
+1988-07-01,5299.5
+1988-10-01,5412.7
+1989-01-01,5527.4
+1989-04-01,5628.4
+1989-07-01,5711.6
+1989-10-01,5763.4
+1990-01-01,5890.8
+1990-04-01,5974.7
+1990-07-01,6029.5
+1990-10-01,6023.3
+1991-01-01,6054.9
+1991-04-01,6143.6
+1991-07-01,6218.4
+1991-10-01,6279.3
+1992-01-01,6380.8
+1992-04-01,6492.3
+1992-07-01,6586.5
+1992-10-01,6697.6
+1993-01-01,6748.2
+1993-04-01,6829.6
+1993-07-01,6904.2
+1993-10-01,7032.8
+1994-01-01,7136.3
+1994-04-01,7269.8
+1994-07-01,7352.3
+1994-10-01,7476.7
+1995-01-01,7545.3
+1995-04-01,7604.9
+1995-07-01,7706.5
+1995-10-01,7799.5
+1996-01-01,7893.1
+1996-04-01,8061.5
+1996-07-01,8159.0
+1996-10-01,8287.1
+1997-01-01,8402.1
+1997-04-01,8551.9
+1997-07-01,8691.8
+1997-10-01,8788.3
+1998-01-01,8889.7
+1998-04-01,8994.7
+1998-07-01,9146.5
+1998-10-01,9325.7
+1999-01-01,9447.1
+1999-04-01,9557.0
+1999-07-01,9712.3
+1999-10-01,9926.1
+2000-01-01,10031.0
+2000-04-01,10278.3
+2000-07-01,10357.4
+2000-10-01,10472.3
+2001-01-01,10508.1
+2001-04-01,10638.4
+2001-07-01,10639.5
+2001-10-01,10701.3
+2002-01-01,10834.4
+2002-04-01,10934.8
+2002-07-01,11037.1
+2002-10-01,11103.8
+2003-01-01,11230.1
+2003-04-01,11370.7
+2003-07-01,11625.1
+2003-10-01,11816.8
+2004-01-01,11988.4
+2004-04-01,12181.4
+2004-07-01,12367.7
+2004-10-01,12562.2
+2005-01-01,12813.7
+2005-04-01,12974.1
+2005-07-01,13205.4
+2005-10-01,13381.6
+2006-01-01,13648.9
+2006-04-01,13799.8
+2006-07-01,13908.5
+2006-10-01,14066.4
+2007-01-01,14233.2
+2007-04-01,14422.3
+2007-07-01,14569.7
+2007-10-01,14685.3
+2008-01-01,14668.4
+2008-04-01,14813.0
+2008-07-01,14843.0
+2008-10-01,14549.9
+2009-01-01,14383.9
+2009-04-01,14340.4
+2009-07-01,14384.1
+2009-10-01,14566.5
+2010-01-01,14681.1
+2010-04-01,14888.6
+2010-07-01,15057.7
+2010-10-01,15230.2
+2011-01-01,15238.4
+2011-04-01,15460.9
+2011-07-01,15587.1
+2011-10-01,15785.3
+2012-01-01,15973.9
+2012-04-01,16121.9
+2012-07-01,16227.9
+2012-10-01,16297.3
+2013-01-01,16475.4
+2013-04-01,16541.4
+2013-07-01,16749.3
+2013-10-01,16999.9
+2014-01-01,17025.2
diff --git a/modern-pandas/modern-pandas.ipynb b/modern-pandas/modern-pandas.ipynb
index 10cbdd5..ad1b9df 100644
--- a/modern-pandas/modern-pandas.ipynb
+++ b/modern-pandas/modern-pandas.ipynb
@@ -35,7 +35,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Reading Data"
+ "Resources\n",
+ "* [Cookbook](https://github.com/jvns/pandas-cookbook) by Jake Vanderplas\n",
+ "* [Cheat Sheet](https://github.com/brandon-rhodes/pycon-pandas-tutorial/blob/master/cheat-sheet.txt) by Brandon Rhodes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1 - Reading Data"
]
},
{
@@ -90,15 +106,18 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## df.loc and Indicies"
+ "## Indicies"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "`df.loc`: allows you to use the column, row indecies\n",
- " `df.loc[row_labels, column_labels]`"
+ "indicies are names for columns and rows stored as native Pandas data type.\n",
+ "\n",
+ "*Why bother with indices?*\n",
+ "\n",
+ "Indices can make lookups and sorting tremendously faster. Brandon Rhodes shows an example that speeds up an operation by 400x. "
]
},
{
@@ -146,6 +165,21 @@
"flights.index"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Search by Indicies: df.loc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`df.loc`: allows you to use the column, row indicies\n",
+ " `df.loc[row_labels, column_labels]`"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 26,
@@ -206,6 +240,20 @@
"flights.loc[[25, 303], [\"fl_date\", \"origin\"]]"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 2 - Filtering Data"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -222,7 +270,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
@@ -298,54 +346,102 @@
"
... | \n",
" \n",
" \n",
- " 611 | \n",
+ " 555 | \n",
" 2014-01-01 | \n",
- " WN | \n",
- " 19393 | \n",
- " N792SW | \n",
- " 1880 | \n",
- " LGA | \n",
- " STL | \n",
- " 1245.0 | \n",
- " 0.0 | \n",
- " 1419.0 | \n",
- " -16.0 | \n",
+ " VX | \n",
+ " 21171 | \n",
+ " N840VA | \n",
+ " 415 | \n",
+ " JFK | \n",
+ " LAX | \n",
+ " 2015.0 | \n",
+ " 15.0 | \n",
+ " 2338.0 | \n",
+ " 13.0 | \n",
" 0.0 | \n",
- " 2014-01-01 14:19:00 | \n",
- " 2014-01-01 12:45:00 | \n",
+ " 2014-01-01 23:38:00 | \n",
+ " 2014-01-01 20:15:00 | \n",
"
\n",
" \n",
"\n",
- "480 rows × 14 columns
\n",
+ "258 rows × 14 columns
\n",
""
],
"text/plain": [
" fl_date unique_carrier airline_id tail_num fl_num origin dest \\\n",
"0 2014-01-01 AA 19805 N338AA 1 JFK LAX \n",
".. ... ... ... ... ... ... ... \n",
- "611 2014-01-01 WN 19393 N792SW 1880 LGA STL \n",
+ "555 2014-01-01 VX 21171 N840VA 415 JFK LAX \n",
"\n",
" dep_time dep_delay arr_time arr_delay cancelled arr \\\n",
"0 914.0 14.0 1238.0 13.0 0.0 2014-01-01 12:38:00 \n",
".. ... ... ... ... ... ... \n",
- "611 1245.0 0.0 1419.0 -16.0 0.0 2014-01-01 14:19:00 \n",
+ "555 2015.0 15.0 2338.0 13.0 0.0 2014-01-01 23:38:00 \n",
"\n",
" dep \n",
"0 2014-01-01 09:14:00 \n",
".. ... \n",
- "611 2014-01-01 12:45:00 \n",
+ "555 2014-01-01 20:15:00 \n",
"\n",
- "[480 rows x 14 columns]"
+ "[258 rows x 14 columns]"
]
},
- "execution_count": 43,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# flights on 2014-01-01 leaving from JFK\n",
- "flights[(flights[\"fl_date\"] == \"2014-01-01\") & (flights[\"origin\"].isin([\"JFK\", \"LGA\"]))]"
+ "flights[(flights[\"fl_date\"] == \"2014-01-01\") & (flights[\"origin\"] == \"JFK\")]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## df.isin"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 True\n",
+ " ... \n",
+ "20816 False\n",
+ "Name: origin, Length: 20817, dtype: bool"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flights[\"origin\"].isin([\"JFK\", \"LGA\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Str Methods"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Str is an attribute on a series with useful methods:\n",
+ "* series.str.startswith(\"Ham\")\n",
+ "* series.str.contains(\"Hamlet\")\n",
+ "* series.str.extract(regex pattern)\n",
+ "* series.str.len()"
]
},
{
@@ -473,26 +569,865 @@
"# use axis=0 to drop rows"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 3 - Combining Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Tip: use row indices! **\n",
+ "\n",
+ "If you have two dataframe, Pandas will rely on row labels (indicies) to automatically join dataframes!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Set Index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`df.set_index(\"DATE\")` or you can directly read a column in as an index:"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 45,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "gdp = pd.read_csv('data/gdp.csv', index_col='DATE')"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 54,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# remove index\n",
+ "gdp = gdp.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Handling Missing Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Drop"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GDP | \n",
+ "
\n",
+ " \n",
+ " DATE | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1947-01-01 | \n",
+ " 243.1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2014-01-01 | \n",
+ " 17025.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
269 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GDP\n",
+ "DATE \n",
+ "1947-01-01 243.1\n",
+ "... ...\n",
+ "2014-01-01 17025.2\n",
+ "\n",
+ "[269 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# returns a new object with na values dropped\n",
+ "gdp.dropna()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Fill"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`ffill`: fills missing values with the previous available value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GDP | \n",
+ "
\n",
+ " \n",
+ " DATE | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1947-01-01 | \n",
+ " 243.1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2014-01-01 | \n",
+ " 17025.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
269 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GDP\n",
+ "DATE \n",
+ "1947-01-01 243.1\n",
+ "... ...\n",
+ "2014-01-01 17025.2\n",
+ "\n",
+ "[269 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdp.fillna(method='ffill')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Merging Datasets "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are two ways\n",
+ "\n",
+ "1. `pd.concat`: Tom recommends this for joining on **Indicies**\n",
+ "2. `pd.merge`: Tom recommends this for merging on **Columns**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " GDP | \n",
+ " fl_date | \n",
+ " unique_carrier | \n",
+ " airline_id | \n",
+ " tail_num | \n",
+ " fl_num | \n",
+ " origin | \n",
+ " dest | \n",
+ " dep_time | \n",
+ " dep_delay | \n",
+ " arr_time | \n",
+ " arr_delay | \n",
+ " cancelled | \n",
+ " arr | \n",
+ " dep | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1947-01-01 | \n",
+ " 243.1 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 21085 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2014-01-31 | \n",
+ " UA | \n",
+ " 19977.0 | \n",
+ " N37293 | \n",
+ " 1456.0 | \n",
+ " LGA | \n",
+ " IAH | \n",
+ " 719.0 | \n",
+ " -6.0 | \n",
+ " 1006.0 | \n",
+ " -20.0 | \n",
+ " 0.0 | \n",
+ " 2014-01-31 10:06:00 | \n",
+ " 2014-01-31 07:19:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
21086 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE GDP fl_date unique_carrier airline_id tail_num \\\n",
+ "0 1947-01-01 243.1 NaT NaN NaN NaN \n",
+ "... ... ... ... ... ... ... \n",
+ "21085 NaN NaN 2014-01-31 UA 19977.0 N37293 \n",
+ "\n",
+ " fl_num origin dest dep_time dep_delay arr_time arr_delay \\\n",
+ "0 NaN NaN NaN NaN NaN NaN NaN \n",
+ "... ... ... ... ... ... ... ... \n",
+ "21085 1456.0 LGA IAH 719.0 -6.0 1006.0 -20.0 \n",
+ "\n",
+ " cancelled arr dep \n",
+ "0 NaN NaT NaT \n",
+ "... ... ... ... \n",
+ "21085 0.0 2014-01-31 10:06:00 2014-01-31 07:19:00 \n",
+ "\n",
+ "[21086 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.merge(gdp, flights, left_on=\"DATE\", right_on=\"dep\", how=\"outer\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 4 - Sorting and Grouping Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Sorting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fl_date | \n",
+ " unique_carrier | \n",
+ " airline_id | \n",
+ " tail_num | \n",
+ " fl_num | \n",
+ " origin | \n",
+ " dest | \n",
+ " dep_time | \n",
+ " dep_delay | \n",
+ " arr_time | \n",
+ " arr_delay | \n",
+ " cancelled | \n",
+ " arr | \n",
+ " dep | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 10762 | \n",
+ " 2014-01-16 | \n",
+ " EV | \n",
+ " 20366 | \n",
+ " N12563 | \n",
+ " 3805 | \n",
+ " ALB | \n",
+ " EWR | \n",
+ " 639.0 | \n",
+ " -16.0 | \n",
+ " 810.0 | \n",
+ " 10.0 | \n",
+ " 0.0 | \n",
+ " 2014-01-16 08:10:00 | \n",
+ " 2014-01-16 06:39:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 20702 | \n",
+ " 2014-01-31 | \n",
+ " MQ | \n",
+ " 20398 | \n",
+ " N672MQ | \n",
+ " 3075 | \n",
+ " SYR | \n",
+ " ORD | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ " NaT | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
20817 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fl_date unique_carrier airline_id tail_num fl_num origin dest \\\n",
+ "10762 2014-01-16 EV 20366 N12563 3805 ALB EWR \n",
+ "... ... ... ... ... ... ... ... \n",
+ "20702 2014-01-31 MQ 20398 N672MQ 3075 SYR ORD \n",
+ "\n",
+ " dep_time dep_delay arr_time arr_delay cancelled \\\n",
+ "10762 639.0 -16.0 810.0 10.0 0.0 \n",
+ "... ... ... ... ... ... \n",
+ "20702 NaN NaN NaN NaN 1.0 \n",
+ "\n",
+ " arr dep \n",
+ "10762 2014-01-16 08:10:00 2014-01-16 06:39:00 \n",
+ "... ... ... \n",
+ "20702 NaT NaT \n",
+ "\n",
+ "[20817 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flights.sort_values(by=[\"origin\", \"dep_delay\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Sorting Index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "this can make lookups much faster"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " DATE | \n",
+ " GDP | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1947-01-01 | \n",
+ " 243.1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 268 | \n",
+ " 2014-01-01 | \n",
+ " 17025.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
269 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " DATE GDP\n",
+ "0 1947-01-01 243.1\n",
+ ".. ... ...\n",
+ "268 2014-01-01 17025.2\n",
+ "\n",
+ "[269 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdp.sort_index()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Grouping"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Pandas can perform `groupby` similar to SQL. \n",
+ "\n",
+ "CAREFUL: applying groupby returns a **groupby object**, not a dataframe!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You apply aggregator functions on these groups:\n",
+ "* `.sum(), .size() .mean() .min() .max()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dep_delay | \n",
+ "
\n",
+ " \n",
+ " origin | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " ALB | \n",
+ " 20.848429 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " SYR | \n",
+ " 28.597826 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
11 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " dep_delay\n",
+ "origin \n",
+ "ALB 20.848429\n",
+ "... ...\n",
+ "SYR 28.597826\n",
+ "\n",
+ "[11 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flights[[\"dep_delay\", \"origin\"]].groupby(\"origin\").mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can apply multiple aggregators"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 5 - Manipulating Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Series.apply(function)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 486.2\n",
+ " ... \n",
+ "268 34050.4\n",
+ "Name: GDP, Length: 269, dtype: float64"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdp[\"GDP\"].apply(lambda x: 2*x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Apply a function to the index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Int64Index([ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,\n",
+ " ...\n",
+ " 258, 259, 260, 261, 262, 263, 264, 265, 266, 267],\n",
+ " dtype='int64', length=269)"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdp.index.map(lambda x: x - 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 6 - Tips"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Plotting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xl8VPW9//HXJ3tIgAQIWwABQURQ\nAgTBqmhrq2gXtLdV3KCKoq12sfX26vXeSrW9P7XXWr1SWqwoWhVxp9YNca2VJZE1rGGThCUJIQkQ\nss7398ec2BEICckks+T9fDzmMWc+55yZz9fB+eR8v99zjjnnEBERCRQT6gRERCT8qDiIiMhRVBxE\nROQoKg4iInIUFQcRETmKioOIiBxFxUFERI6i4iAiIkdRcRARkaPEhTqBlurRo4cbOHBgqNMQEYko\nubm5Jc65jKa2a7I4mNlc4FtAkXNupBd7HhjmbZIGlDnnssxsILAe2OitW+Kcu9nbZyzwJJAMvAH8\n1DnnzKwb8DwwENgOXO6c299UXgMHDiQnJ6epzUREJICZ7WjOds3pVnoSmBQYcM5d4ZzLcs5lAS8B\nLwes3tKwrqEweGYDNwJDvUfDe94BLHbODQUWe69FRCSEmiwOzrmPgNJjrTMzAy4Hnjvee5hZH6CL\nc26J81/p7yngUm/1ZGCetzwvIC4iIiHS2gHpc4G9zrnNAbFBZrbCzD40s3O9WCZQELBNgRcD6OWc\n2+0t7wF6tTInERFppdYOSF/Jl48adgMDnHP7vDGGV81sRHPfzBuDaPQa4mY2A5gBMGDAgKPW19bW\nUlBQQFVVVXM/MqIkJSXRr18/4uPjQ52KiES5FhcHM4sDvguMbYg556qBam8518y2AKcAhUC/gN37\neTGAvWbWxzm32+t+KmrsM51zc4A5ANnZ2UcVkYKCAjp37szAgQPx93hFD+cc+/bto6CggEGDBoU6\nHRGJcq3pVvo6sME590V3kZllmFmstzwY/8DzVq/bqMLMJnjjFFOB17zdFgLTvOVpAfETVlVVRffu\n3aOuMACYGd27d4/aoyIRCS9NFgczew74FBhmZgVmNt1bNYWjB6InAqvNbCXwInCzc65hMPtHwF+A\nfGAL8KYXvw/4hpltxl9w7mtFe6KyMDSI5raJSHhpslvJOXdlI/EfHCP2Ev6prcfaPgcYeYz4PuCC\npvIQEZGWcc6xZGspS7bua/Y+unxGkO3du5errrqKwYMHM3bsWM466yxeeeUVPvjgA7p27cro0aMZ\nNmwYEydO5PXXX/9iv5kzZ5KZmUlWVhYjR45k4cKFIWyFiESTZ5d9zpWPLeGR9zY3vbFHxSGInHNc\neumlTJw4ka1bt5Kbm8v8+fMpKPAPy5x77rmsWLGCjRs38sgjj3DrrbeyePHiL/a/7bbbWLlyJS+8\n8ALXX389Pp8vVE0RkSjh8zke/3gbZ/Tryqq7L2z2fioOQfTee++RkJDAzTf/68Twk046iR//+MdH\nbZuVlcWvfvUrHn300aPWDR8+nLi4OEpKSto0XxGJfp9sKWFrySGuO3sgXZKaPw0+Yi+815Rf/y2P\ndbsqgvqep/Xtwt3fbvy0jby8PMaMGdPs9xszZgy/+93vjoovXbqUmJgYMjKavDaWiMhxzfloKz1S\nE7jk9D4ntJ+OHNrQLbfcwqhRoxg3btwx1/uvJPIvDz30EFlZWdx+++08//zzmp0kIq2yfHspH28u\nYcbEwSTGxZ7QvlF75HC8v/DbyogRI3jppX9N1po1axYlJSVkZ2cfc/sVK1YwfPjwL17fdttt3H77\n7W2ep4hEv52llcxcmEeP1ESunTDwhPfXkUMQfe1rX6OqqorZs2d/EausrDzmtqtXr+bee+/llltu\naa/0RKSD2FZyiEl/+IjtJYf4zaUjSE44saMGiOIjh1AwM1599VVuu+02HnjgATIyMkhJSeH+++8H\n4OOPP2b06NFUVlbSs2dPHnnkES64QKd4iEjwOOeYuTAPM+PNn55L/26dWvQ+Kg5B1qdPH+bPn3/M\ndeXl5Y3uN3PmzDbKSEQ6iuq6eh5+dzMfbirmv745vMWFAVQcRESiwqHqOq57cjnLtpXyb2P6Me0r\nA1v1fioOIiIRrt7nuPGpHHK2l/KHK7K4dHRm0zs1IeoGpI+cHhpNorltItJys97P559b9nHfd88I\nSmGAKCsOSUlJ7Nu3Lyp/RBvu55CUlBTqVEQkjGzcc4CHF29mclZfLh/XP2jvG1XdSv369aOgoIDi\n4uJQp9ImGu4EJyLS4L4319MpIZaZQT63K6qKQ3x8vO6SJiIdxj/zS3h/YzF3Xnwq6SkJQX3vqOpW\nEhHpKHw+x/97cwOZacmtnpl0LCoOIiIR6LVVhawpLOf2i04hKf7Ez4BuioqDiEiE2bjnAP/9ah6j\n+qcxeVRwZicdScVBRCSC1NX7+OEzuXRKiOXP14wlJqZtrt4cVQPSIiLR7o21e9hafIg/XTOG3l3b\nbmp7k0cOZjbXzIrMbG1AbKaZFZrZSu9xScC6O80s38w2mtlFAfFJXizfzO4IiA8ys6Ve/HkzC+6Q\nu4hIFKip8/HaykIefncTQ3qmcuFpvdv085rTrfQkMOkY8Yecc1ne4w0AMzsNmAKM8Pb5o5nFmlks\nMAu4GDgNuNLbFuB+772GAPuB6a1pkIhINPqfN9bz0/kr2bGvktsvHNZm3UkNmiwOzrmPgNJmvt9k\nYL5zrto5tw3IB870HvnOua3OuRpgPjDZ/Lc6+xrworf/PODSE2yDiEhUW11QxrxPt3P1+AGsuvtC\nJo1s26MGaN2A9K1mttrrdkr3YpnAzoBtCrxYY/HuQJlzru6IuIiI4B+A/s9X1pCRmsh/XHwqKYnt\nM1Tc0uIwGzgZyAJ2Aw8GLaPjMLMZZpZjZjnReokMEZFAT326g7WFFfzq26fRJSm+3T63RcXBObfX\nOVfvnPMBj+HvNgIoBAKv/NTPizUW3wekmVncEfHGPneOcy7bOZedkZHRktRFRMLCx5uL+fFzK5i5\nMK/Rbcora3no3U2cd0oG3zy9Tztm18LiYGaBWV4GNMxkWghMMbNEMxsEDAWWAcuBod7MpAT8g9YL\nnf/yqe8D3/P2nwa81pKcREQixcqdZVz3xHLeydvDk//czvaSQ8fc7rGPt3Kgqo47Lj4V/xBt+2nO\nVNbngE+BYWZWYGbTgQfMbI2ZrQa+CtwG4JzLAxYA64C3gFu8I4w64FbgbWA9sMDbFuA/gJ+bWT7+\nMYjHg9pCEZEw8drKQs6+7z2ufXwpvboksfDWczCDV1Yc3WGytfggT3yyjW+e0Yfhfbq0e64Wqfc+\nyM7Odjk5OaFOQ0SkWT7cVMz0J5cztFdnenVJ5GdfP4Ws/mlc9dgSCssO88Ht539xdFBUUcVlf/wn\nVbX1vPKjsxnQveX3gj6SmeU657Kb2k6XzxARaWN7yqv46fwVDOmZyoKbJvDkdWeS1T8NgO+N7ceO\nfZUsXLXri+3veX0dJQermXf9mUEtDCdCxUFEpA055/j3F1dRXetj1tVj6HzEjKPJWZmM6teVe/62\njp2llby3YS+vr97NzeedzMjMriHKWtdWEhFpU4vXF/Hx5hJmfvs0Ts5IPWp9bIxx37+dwaWzPuHc\nB94HYEC3Ttx83sntneqXqDiIiLSRmjof//PmegZnpHD1hJMa3W54ny4s/sV5vPxZIV2T47lsTCbJ\nCcG/R8OJUHEQEWkjf/wgn63Fh3jiB+OIjz1+L36/9E785IKh7ZRZ0zTmICLSBjbuOcCs9/OZnNWX\nr57aM9TpnDAVBxGRIKur9/HLF1fRJSmeu789ItTptIi6lUREguzR9/NZVVDOo1eNpltKZN6iRkcO\nIiJB9MQn2/jDu5v57ujMdr8eUjCpOIiIBEnujv3c+/o6LjytFw9874x2vx5SMKk4iIgEQVVtPbc9\nv5K+ack8ePko4pqYnRTuNOYgIhIEb+ft4fPSSp64btxRZ0FHosgubSIiYeLZpZ9zUvdOnDc0Ou41\no+IgItJKW4oPsnRbKVeM609MTOSOMwRScRARaaX739xAcnws3x/bv+mNI4SKg4hIK7y/sYh31u3l\nxxcMIaNzYqjTCRoVBxGRFnLO8eA7GzmpeyduOGdwqNMJKhUHEZEW+nTLPtYWVnDzeSeTEBddP6fR\n1RoRkXbinGPWB/n0SE3kstGZoU4n6FQcRERa4NWVhXySv48fnX8ySfGhvfdCW1BxEBE5QeWHa/n1\n39YxZkAa074yMNTptIkmi4OZzTWzIjNbGxD7nZltMLPVZvaKmaV58YFmdtjMVnqPPwXsM9bM1phZ\nvpk9Yt5FR8ysm5ktMrPN3nN6WzRURCRYXswtoKyyll9/ZySxUXJew5Gac+TwJDDpiNgiYKRz7gxg\nE3BnwLotzrks73FzQHw2cCMw1Hs0vOcdwGLn3FBgsfdaRCQs+XyOpz/dztiT0jm9X9dQp9NmmiwO\nzrmPgNIjYu845+q8l0uAfsd7DzPrA3Rxzi1xzjngKeBSb/VkYJ63PC8gLiISdt7bUMT2fZVMPavx\ne0JHg2CMOVwPvBnwepCZrTCzD83sXC+WCRQEbFPgxQB6Oed2e8t7gF5ByElEJOjq6n3c/9YGBnbv\nxMUjI/deDc3RqquymtldQB3wjBfaDQxwzu0zs7HAq2bW7HvkOeecmbnjfN4MYAbAgAEDWp64iEgL\nvJBbwOaig/zpmjFRd17DkVrcOjP7AfAt4GqvqwjnXLVzbp+3nAtsAU4BCvly11M/Lwaw1+t2auh+\nKmrsM51zc5xz2c657IyM6LjyoYhEhkPVdTz4ziayT0rnohG9Q51Om2tRcTCzScAvge845yoD4hlm\nFustD8Y/8LzV6zaqMLMJ3iylqcBr3m4LgWne8rSAuIhI2PjzR1spOVjNXd8cHtF3eGuuJruVzOw5\n4Hygh5kVAHfjn52UCCzy/iMt8WYmTQTuMbNawAfc7JxrGMz+Ef6ZT8n4xygaxinuAxaY2XRgB3B5\nUFomIhIkeyuqeOyjrXzrjD6MHtAxZts3WRycc1ceI/x4I9u+BLzUyLocYOQx4vuAC5rKQ0QkVH7/\nzibqfY7/mHRqqFNpN9E9oiIi0kprCspZkLuTqWedRP9unUKdTrtRcRARaUS9z3HXq2vokZrIT74+\nNNTptCsVBxGRRsxf/jmrC8r5r28Op0tSfKjTaVcqDiIix3Cwuo6HFm1i3MB0vjOqb6jTaXcqDiIi\nx/D4x9soOVjDXd88rUNMXT2SioOIyBFq6nw8vWQHF5zak6z+aaFOJyRUHEREjvDm2t2UHKzm2ii/\nuN7xqDiIiASoqfPx2MdbGdi9ExOHdtzL9Kg4iIgE+O3f17G2sILbLxpGTJTeyKc5VBxERDwv5hYw\n79Md3HDOIL51RseboRSoVZfsFhGJBs453lizh/98ZQ1nDe7OHRd3nMtkNEbFQUQ6tLp6H3e+vIYX\ncgsYmdmFR68aTVysOlVUHESkw6qp83Hb8yv5+5rd/PhrQ/jpBUNVGDwqDiLSIdXW+/jhX3NZvKGI\n//rmcG44d3CoUworKg4i0iH99u/rWbyhiHsvHcm1Ezru+QyNUXEQkQ5jd/lhNuw+wJtrd7Mgp4Dp\n5wxSYWiEioOIdAjbSg4x+dF/UFFVR1yMMWPiYH550bBQpxW2VBxEJKrV1Pl4+bMCZn+4hdgY45kb\nxnNyRiq9uyaFOrWwpuIgIlGr3ue+mI00qEcKc6ZmM25gt1CnFRFUHEQkKlVU1fIfL67mzbV7uOuS\n4dxw7qAOeentlmrWhF4zm2tmRWa2NiDWzcwWmdlm7zndi5uZPWJm+Wa22szGBOwzzdt+s5lNC4iP\nNbM13j6PmL5BEWkF5xw/mLuMd9bt5a5LhnPjxMEqDCeouWd7PAlMOiJ2B7DYOTcUWOy9BrgYGOo9\nZgCzwV9MgLuB8cCZwN0NBcXb5saA/Y78LBGRZlu+fT+ffV7GzG+fxo0Tdf5CSzSrODjnPgJKjwhP\nBuZ5y/OASwPiTzm/JUCamfUBLgIWOedKnXP7gUXAJG9dF+fcEuecA54KeC8RkRP25D+30TU5nu+N\n7R/qVCJWa84T7+Wc2+0t7wF6ecuZwM6A7Qq82PHiBceIi4icsEXr9vLW2j1MGdef5ITYUKcTsYJy\nERHvL34XjPc6HjObYWY5ZpZTXFzc1h8nIhHms8/386Nncjk9syu3fG1IqNOJaK0pDnu9LiG85yIv\nXggEHsv182LHi/c7Rvwozrk5zrls51x2RkbHvUOTiBzb797aSNfkBJ6aPp4uSfGhTieitaY4LAQa\nZhxNA14LiE/1Zi1NAMq97qe3gQvNLN0biL4QeNtbV2FmE7xZSlMD3ktEpFmWbSvl0637uPm8wXRN\nVmForWad52BmzwHnAz3MrAD/rKP7gAVmNh3YAVzubf4GcAmQD1QC1wE450rN7F5gubfdPc65hkHu\nH+GfEZUMvOk9RESapabOx6//lkeP1ESuHq9rJQVDs4qDc+7KRlZdcIxtHXBLI+8zF5h7jHgOMLI5\nuYiIHOmRxZvJ21XBn68dq0HoINFdLUQkor2Qs5NH38/n+2P7cdGI3qFOJ2qoOIhIxFq0bi93vLyG\nc4b04DeXqfMhmFQcRCQirS0s59ZnP2Nk3y78+dqxJMapOymYVBxEJOL4fI67Xl1L56R4nrjuTFIS\ndQ3RYFNxEJGI80LuTlbtLOM/LzmVbikJoU4nKqk4iEhE2Vlayb2vr+fMQd24bLSutNNWVBxEJGIc\nrK7jx8+tAODB74/SZbjbkDrqRCQilB+u5fonl7OmsJxZV42hf7dOoU4pqqk4iEjYKzlYzdTHl7G5\n6ACPXjmaSSN1PkNbU3EQkbC2q+ww1/xlKbvKD/PY1GzOH9Yz1Cl1CCoOIhK2qmrrmT4vh+ID1Tw9\nfTzjBnYLdUodhoqDiIStmQvzWL+7gid+ME6FoZ1ptpKIhKW/LtnB/OU7ueWrJ/PVU9WV1N5UHEQk\n7Czduo+ZC/P46rAMfv6NYaFOp0NScRCRsFJYdpgfPfMZA7p14uErRxMbo3MZQkHFQUTCxuGaemY8\nlUNNnY85U7N1q88Q0oC0iISFwzX1/PCZXNbtruAvU7MZ0jM11Cl1aCoOIhJyPp9jxtM5/CO/hN9e\nejoXDO8V6pQ6PHUriUjIvZC7k483l3DP5JFcNX5AqNMRVBxEJMRWF5Tx27/7r7J6jQpD2FBxEJGQ\n+XBTMVPmLKFzUjz/+z1dZTWctLg4mNkwM1sZ8Kgws5+Z2UwzKwyIXxKwz51mlm9mG83sooD4JC+W\nb2Z3tLZRIhL+XltZyA3zljOwewqv3PIVBnTXVVbDSYsHpJ1zG4EsADOLBQqBV4DrgIecc/8buL2Z\nnQZMAUYAfYF3zewUb/Us4BtAAbDczBY659a1NDcRCV/1PsdfPt7KfW9t4MyB3XhsmqashqNgzVa6\nANjinNtxnMPCycB851w1sM3M8oEzvXX5zrmtAGY239tWxUEkynz2+X5+9dpa1hZWMGlEb/4wJYuk\n+NhQpyXHEKwxhynAcwGvbzWz1WY218zSvVgmsDNgmwIv1lj8KGY2w8xyzCynuLg4SKmLSHt4MbeA\n7/7xnxQfqOb/rhzN7GvGqDCEsVYXBzNLAL4DvOCFZgMn4+9y2g082NrPaOCcm+Ocy3bOZWdkZATr\nbUWkjX24qZg7X17N2UO6s/gX5/PtUX01+BzmgtGtdDHwmXNuL0DDM4CZPQa87r0sBPoH7NfPi3Gc\nuIhEuCc+2ca9r6/jlF6d+ePVY0lN1Lm3kSAY3UpXEtClZGZ9AtZdBqz1lhcCU8ws0cwGAUOBZcBy\nYKiZDfKOQqZ424pIhHs7bw+//ts6vj68Fy/98Ct0TdbAc6RoVQk3sxT8s4xuCgg/YGZZgAO2N6xz\nzuWZ2QL8A811wC3OuXrvfW4F3gZigbnOubzW5CUioZe7Yz+/WLCKUf268siVozW+EGHMORfqHFok\nOzvb5eTkhDoNETmGVTvLuOqxJfTsksRzN06gd9ekUKckHjPLdc5lN7WdOv9EJKj2lFdx41M5pKck\n8PyMCfTsosIQiXT5DBEJmk+37GPyrH9wqLqOx6eNU2GIYDpyEJFWO1Rdx+wPtjDrg3wGdU9h7g/G\nMax351CnJa2g4iAirfLRpmJ+Mn8FZZW1fHdMJvdOHkmKpqtGPH2DItJi728s4qancxncI4UnfjCO\n0QPSm95JIoKKg4i0yOL1e/nhXz/jlN6p/HX6eNI6JYQ6JQkiDUiLyAl7J28PN/81l1P7dOaZ6RNU\nGKKQioOInJC31u7hR898xml9u/L09PF07aSznqORupVEpFkqa/wzkmZ/sIXT+3Vl3vVn6j4MUUzF\nQUSatGpnGT+dv4Lt+yr5zqi+/PaykXRWYYhqKg4i0qiaOh+vrCjgv1/NI6NzIs/dOIGzTu4e6rSk\nHag4iMiXrC0s5/63NrB0WyngLxATBndj9tVjSU/RwHNHoeIgIl/465Id/PpveXROiueqMwcQH2uM\nH9Sd84dlEBer+SsdiYqDiADwQs5O/uvVtXx1WAZ/uGK0ZiF1cCoOIh3cweo6nvxkGw8v3szZQ7oz\nZ2o28TpK6PBUHEQ6sE17D3DT07lsKznE14f35H+/P0qFQQAVB5EOK29XOVc9tpT42BjNQpKjqDiI\ndECb9h7g2seXkZIQy/M3nUX/bp1CnZKEGR0/inQwW4sPctVjS4mLMZ65cYIKgxyTioNIB/L5vkqu\n/stSnHM8e+N4BvVICXVKEqZaXRzMbLuZrTGzlWaW48W6mdkiM9vsPad7cTOzR8ws38xWm9mYgPeZ\n5m2/2cymtTYvEfmyVTvL+O7sTzhcW89fbxjPkJ66U5s0LlhHDl91zmU557K913cAi51zQ4HF3muA\ni4Gh3mMGMBv8xQS4GxgPnAnc3VBQRKT11u+u4Jq/LCUpPpYXbz6L4X26hDolCXNt1a00GZjnLc8D\nLg2IP+X8lgBpZtYHuAhY5Jwrdc7tBxYBk9ooN5EO5d11e7n28aWkJsWx4KazdMQgzRKM4uCAd8ws\n18xmeLFezrnd3vIeoJe3nAnsDNi3wIs1FheRVnj8H9u44akceqQm8vT08fRNSw51ShIhgjGV9Rzn\nXKGZ9QQWmdmGwJXOOWdmLgifg1d8ZgAMGDAgGG8pErVeyi3gN39fx6QRvfm/q0br5DY5Ia3+1+Kc\nK/Sei4BX8I8Z7PW6i/Cei7zNC4H+Abv382KNxY/8rDnOuWznXHZGRkZrUxeJSjV1Pu5+bS2/eGEV\n4wd14w9TslQY5IS16l+MmaWYWeeGZeBCYC2wEGiYcTQNeM1bXghM9WYtTQDKve6nt4ELzSzdG4i+\n0IuJyAnILzrA5X/+lHmf7uDGcwfx9PTxJMXHhjotiUCt7VbqBbxiZg3v9axz7i0zWw4sMLPpwA7g\ncm/7N4BLgHygErgOwDlXamb3Asu97e5xzpW2MjeRDqNgfyWz3s/nxdwCUhLj+OPVY7jk9D6hTksi\nmDkXlOGAdpedne1ycnJCnYZISG0vOcSCnJ3M/WQbPgdXZPfnZ18fSvfUxFCnJmHKzHIDTjtolK6t\nJBKB9h2s5veLNvHcss8BuHhkH+765nDNRpKgUXEQiTCvrCjgV6/lUVlTz9SzBnLzeSfTu2tSqNOS\nKKPiIBIhqmrreXjxZmZ/sIUzB3bjf747Uie0SZtRcRCJAPlFB5nxdA5biw9xRXZ/7r10JAlxmp4q\nbUfFQSSM1fscL+Ts5LdvrCcxLoZ515/JeafoHB9peyoOImFqW8khfr5gJSs+LyP7pHQeuiJL916Q\ndqPiIBKGVu0sY+rcZQD84YosJmf1xTufSKRdqDiIhJm8XeVc85elpKXE8+wNulObhIaKg0gY2VZy\niGlzl5OaFMfzM87SeQsSMpruIBImthYfZMqcT/E5x1PXn6nCICGlIweRMLCl+CBXzllCvc/x3I0T\nGNpL5y9IaKk4iIRYftFBrnxsCc45npsxgVNUGCQMqDiIhNB7G/byyxdXA6YjBgkrKg4i7az4QDWv\nrijklRWFrNtdwam9O/PoVWMY0jM11KmJfEHFQaSNFVVU8fKKQvKLDgLw+updVNX6yOqfxj2TR3DF\nuP4kxumGPBJeVBxE2tDKnWXc/HQueyqq6Nk5karaei48rTc/uWCojhQkrKk4iLSB8spafvDkMlZ8\nXkavLon8/SfnMKJv11CnJdJsKg4iQebzOX6+YCVrC8v572+dxmWjM+mWkhDqtEROiIqDSJDNej+f\nxRuKuGfyCKaeNTDU6Yi0iIqDSJD4fI4FOTv5/bubuGx0JtdOOCnUKYm0mIqDSCu9v7GI+9/cQMH+\nwxysrmPcwHT+57LTdRVViWgtvraSmfU3s/fNbJ2Z5ZnZT734TDMrNLOV3uOSgH3uNLN8M9toZhcF\nxCd5sXwzu6N1TRJpH0UVVdzy7Gdc98Ryaup9fG9sPx6eksXzM84iOUFTUyWytebIoQ74hXPuMzPr\nDOSa2SJv3UPOuf8N3NjMTgOmACOAvsC7ZnaKt3oW8A2gAFhuZgudc+takZtIm/H5HM8s+5wH3txA\ndb2PX3zjFGacN1jnKkhUaXFxcM7tBnZ7ywfMbD2QeZxdJgPznXPVwDYzywfO9NblO+e2ApjZfG9b\nFQcJK8453t9YxMOL81m1s4yzh3TnN5eezqAeKaFOTSTogjLmYGYDgdHAUuBs4FYzmwrk4D+62I+/\ncCwJ2K2AfxWTnUfExzfyOTOAGQADBgwIRuoizVJZU8edL6/htZW7yExL5veXj+Ky0ZkaV5Co1er7\nOZhZKvAS8DPnXAUwGzgZyMJ/ZPFgaz+jgXNujnMu2zmXnZGhm6xL+3h/YxFff/BDFq7axS++cQof\n/Pv5fHdMPxUGiWqtOnIws3j8heEZ59zLAM65vQHrHwNe914WAv0Ddu/nxThOXCSkcneUctNTuQzq\nkcLDV45m3MBuoU5JpF20uDiY/8+mx4H1zrnfB8T7eOMRAJcBa73lhcCzZvZ7/APSQ4FlgAFDzWwQ\n/qIwBbiqpXmJBMO2kkM8u3QH85ftpG9aEs/fNIG0TjrLWTqO1hw5nA1cC6wxs5Ve7D+BK80sC3DA\nduAmAOdcnpktwD/QXAfc4pxrVS/bAAAKC0lEQVSrBzCzW4G3gVhgrnMurxV5ibRYUUUVv/n7ehau\n2kVcjHHRiN78ctIwFQbpcMw5F+ocWiQ7O9vl5OSEOg2JElW19Tyz9HMeWrSJmnofM84dzNSzTqJn\nl6RQpyYSVGaW65zLbmo7nSEtHVp5ZS0LcnYy5+OtFB+o5tyhPbhn8khNT5UOT8VBOpyq2npe+qyA\nN9bsZsnWUup9jrOHdOfhKVmcNbi7ZiGJoOIgHUhdvY+XPyvkD+9uYld5FSdnpHDTxMFccnofRmbq\nXgsigVQcJOodrK7j7bV7+OMH+WwpPsSofl353fdHcfaQHqFOTSRsqThI1Kn3OdYUlvPW2j28tXY3\nO0orcQ6G9EzlT9eM4aIRvdV1JNIEFQeJCvsOVrNo3V427DnAG2t2U3SgmtgYY+LQHkzOyuTcoT0Y\nMyCdmBgVBZHmUHGQiLVuVwWvrSxk+fZS1hSWU1vvSIyL4dyhGXx7VB/OHZqh23OKtJCKg0SUssoa\nPtxUzPxlO/l06z7iYowxA9K5/uxBXDYmk1N6dtbRgUgQqDhIWDtcU8/qgjKWby/lzbV7yNtVAUBm\nWjJ3Xnwql2f3J11HByJBp+IgYaWu3sd7G4r4dOs+Ptuxn7xdFdT5/GfxZ/VP4/YLT2H84O6M1fiB\nSJtScZB2V1Pn4/PSSooqqthTUcW2kkNs2nuAwrLD7CmvpuRgNUnxMYzql8aMiYMZe1I6oweka/xA\npB2pOEibcc5RcrCG/KKD5BcdIG9XBWt3lbNxzwFq6/91Ta/YGGNg907079aJwT1S+dYZffjqqT2J\nj2317UZEpIVUHCQonHPsr6xla/FBNu49wLJtpSzZuo+9FdVfbJPWKZ7TM7ty/TmDOLV3Z3p3SaZX\nl0T6pXciIU6FQCScqDhIs+2tqGLF52UU7K+k6EA1eyuq2FtRRVGFf/lQTf0X22Z0TmTC4O5k9U9j\naM9UhvRMpU/XJJ18JhIhVBzkS8oP17KztNL/2F/J56WV7Cw9TH7RQQrLDn+xXWJcDL26JNGrSyLD\n+3bh/GE96ZuWxMkZqZyckUr/bskqBCIRTMWhA2jo8ik5WE3F4VoqqmopOVDj/8v/QBV7yqspLDtM\n4f5KKqrqvrRv1+R4+ndLZvSANK4/ZxBjBqQxuEcqXZLj9OMvEsVUHCJAw8Bu+eFaaup8HK6tZ3f5\nYSpr6sGBzzkc/ueyytp/dfUc8D8XHaj60gBwoPRO8fTsnERmejLjBqbTLz2Z/un+weH+3TrRNTm+\nfRsrImFBxaGNOec4VFPP/kM1lFXWsr+yhv2V/uWyylqq6uqp9znq6h0+50hNjMPnHKWHary/5g9T\nWHaY6jpfsz+zS1Kc1+WTxPhBKfT0un96pCbSNTmezklx9EhNJKNzIknxsW3YehGJVCoOHuccB6vr\nqKyp9z9X11NZU0dVnY+DVXUUHaii+EA1VbU+6n0+fA4S4mJIiIvB53NU1/moqq2nrLKW0soayipr\n2F9ZS3llLTX1jf+wJ8TGEBtjxMUYGByqrsPMSO8UT2ZaMqf26cwFw3uSmZZMekoCiXExJMbF0ict\niZSEOGJiDAPMIMaMrsnx+sEXkVaL+OLgnP+HueEH/VBNHYeq6zhcW0+sGfFxMcTHxhAfaxhG8cFq\ndpUdZleZ/y/y3WVV7Co/zO7yKmqa+Os8NsboFB/r/0E2qK3zUVPvI8aMBO9HO71TPOmdEhjUI4Ux\nnRJI65TwRSytUzzpKQmke7GuyfHEHTGXv+Ge3urPF5FQsoYfo1Azs0nAw0As8Bfn3H3H2z613zA3\nZMb/cbCq7ovLK5yIGINeXZLo0zWJvmnJZKYl0yM1kZTEOFISY+mUEEdKQiyJ8bGkJMaSkZpIeqcE\nXbJBRCKameU657Kb2i4sjhzMLBaYBXwDKACWm9lC59y6xvZJSYxl8qi+pCbFkZIYR2piHCkJ//ph\nT06Ixedz1NY7auv9f+H7fI4enRPpm5ZMr86JR/3VLiIifmFRHIAzgXzn3FYAM5sPTAYaLQ790zvx\n68kj2yk9EZGOJVz+dM4Edga8LvBiIiISAuFSHJrFzGaYWY6Z5RQXF4c6HRGRqBUuxaEQ6B/wup8X\n+xLn3BznXLZzLjsjI6PdkhMR6WjCpTgsB4aa2SAzSwCmAAtDnJOISIcVFgPSzrk6M7sVeBv/VNa5\nzrm8EKclItJhhUVxAHDOvQG8Eeo8REQkfLqVREQkjKg4iIjIUcLm8hknyswOABtDnUcI9ABKQp1E\niHTUtnfUdkPHbXtbtvsk51yT0z3DZsyhBTY25/og0cbMcjpiu6Hjtr2jths6btvDod3qVhIRkaOo\nOIiIyFEiuTjMCXUCIdJR2w0dt+0dtd3Qcdse8nZH7IC0iIi0nUg+chARkTYSccXBzCaZ2UYzyzez\nO0KdT1szs+1mtsbMVppZjhfrZmaLzGyz95we6jxby8zmmlmRma0NiB2zneb3iPdvYLWZjQld5q3X\nSNtnmlmh972vNLNLAtbd6bV9o5ldFJqsW8/M+pvZ+2a2zszyzOynXjyqv/fjtDu8vnPnXMQ88F93\naQswGEgAVgGnhTqvNm7zdqDHEbEHgDu85TuA+0OdZxDaOREYA6xtqp3AJcCbgAETgKWhzr8N2j4T\nuP0Y257m/btPBAZ5/z/EhroNLWx3H2CMt9wZ2OS1L6q/9+O0O6y+80g7cvjijnHOuRqg4Y5xHc1k\nYJ63PA+4NIS5BIVz7iOg9IhwY+2cDDzl/JYAaWbWp30yDb5G2t6YycB851y1c24bkI///4uI45zb\n7Zz7zFs+AKzHf5OvqP7ej9PuxoTkO4+04tAR7xjngHfMLNfMZnixXs653d7yHqBXaFJrc421s6P8\nO7jV6z6ZG9B1GJVtN7OBwGhgKR3oez+i3RBG33mkFYeO6Bzn3BjgYuAWM5sYuNL5jzujfspZR2ln\ngNnAyUAWsBt4MLTptB0zSwVeAn7mnKsIXBfN3/sx2h1W33mkFYdm3TEumjjnCr3nIuAV/IeTexsO\np73notBl2KYaa2fU/ztwzu11ztU753zAY/yrGyGq2m5m8fh/IJ9xzr3shaP+ez9Wu8PtO4+04tCh\n7hhnZilm1rlhGbgQWIu/zdO8zaYBr4UmwzbXWDsXAlO92SsTgPKAboiocERf+mX4v3fwt32KmSWa\n2SBgKLCsvfMLBjMz4HFgvXPu9wGrovp7b6zdYfedh3rkvgUj/ZfgH93fAtwV6nzauK2D8c9SWAXk\nNbQX6A4sBjYD7wLdQp1rENr6HP5D6Vr8farTG2sn/tkqs7x/A2uA7FDn3wZtf9pr22r8Pw59Ara/\ny2v7RuDiUOffinafg7/LaDWw0ntcEu3f+3HaHVbfuc6QFhGRo0Rat5KIiLQDFQcRETmKioOIiBxF\nxUFERI6i4iAiIkdRcRARkaOoOIiIyFFUHERE5Cj/H32D6t3fpfOrAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "gdp.plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Paste Dataframe into Excel\n",
+ "`to_clipboard()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gdp.to_clipboard()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Streaming with Iterators!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Dask](https://dask.pydata.org/en/latest/): is an option to consider for distributing Pandas-like operations. It lazily defines a directed acyclic graph for computations. The API is very similar to that of Pandas."
+ ]
}
],
"metadata": {