diff --git a/100-pandas-puzzles.ipynb b/100-pandas-puzzles.ipynb index aa4381626..9642fa1b8 100644 --- a/100-pandas-puzzles.ipynb +++ b/100-pandas-puzzles.ipynb @@ -46,7 +46,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -62,7 +64,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "pd.__version__" + ] }, { "cell_type": "markdown", @@ -78,7 +82,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "pd.show_versions(as_json=False)" + ] }, { "cell_type": "markdown", @@ -117,7 +123,15 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "import numpy as np + data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], + 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], + 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], + 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']} + labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + df = pd.DataFrame(data, index=labels)" + ] }, { "cell_type": "markdown", @@ -133,7 +147,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.info()" + ] }, { "cell_type": "markdown", @@ -149,7 +165,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.head(3)" + ] }, { "cell_type": "markdown", @@ -165,7 +183,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc[:,['animal','age']]" + ] }, { "cell_type": "markdown", @@ -181,7 +201,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc[df.index[[3, 4, 8]], ['animal', 'age']]" + ] }, { "cell_type": "markdown", @@ -197,7 +219,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df[df['visits'] > 3]" + ] }, { "cell_type": "markdown", @@ -213,7 +237,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df[df['age'].isnull()]" + ] }, { "cell_type": "markdown", @@ -229,7 +255,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df[(df['animal'] == 'cat') & (df['age'] < 3)]" + ] }, { "cell_type": "markdown", @@ -245,7 +273,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df[df['age'].between(2, 4)]" + ] }, { "cell_type": "markdown", @@ -261,7 +291,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc['f', 'age'] = 1.5" + ] }, { "cell_type": "markdown", @@ -277,7 +309,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['visits'].sum()" + ] }, { "cell_type": "markdown", @@ -293,7 +327,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.groupby('animal')['age'].mean()" + ] }, { "cell_type": "markdown", @@ -309,7 +345,10 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.loc['k'] = ['cat',4.5,2,'yes'] + df = df.drop('k')" + ] }, { "cell_type": "markdown", @@ -325,7 +364,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['animal'].value_counts()" + ] }, { "cell_type": "markdown", @@ -341,7 +382,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.sort_values(by=['age', 'visits'], ascending=[False, True])" + ] }, { "cell_type": "markdown", @@ -357,7 +400,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['priority'] = df['priority'].map({'yes': True, 'no': False})" + ] }, { "cell_type": "markdown", @@ -373,7 +418,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df['animal'] = df['animal'].replace('snake', 'python')" + ] }, { "cell_type": "markdown", @@ -389,7 +436,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.pivot_table(index='animal', columns='visits', values='age', aggfunc='mean')" + ] }, { "cell_type": "markdown", @@ -444,7 +493,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.sub(df.mean(axis=1), axis=0)" + ] }, { "cell_type": "markdown", @@ -465,7 +516,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.sum().idxmin()" + ] }, { "cell_type": "markdown", @@ -481,7 +534,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "len(df.drop_duplicates(keep=False))" + ] }, { "cell_type": "markdown", @@ -501,7 +556,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "(df.isnull().cumsum(axis=1) == 3).idxmax(axis=1)" + ] }, { "cell_type": "markdown", @@ -523,7 +580,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.groupby('grps')['vals'].nlargest(3).sum(level=0)" + ] }, { "cell_type": "markdown", @@ -539,7 +598,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "df.groupby(pd.cut(df['A'], np.arange(0, 101, 10)))['B'].sum()" + ] }, { "cell_type": "markdown",