pandas.html

<!DOCTYPE html>
<html lang="en" xml:lang="en">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title>Chapter 9 The pandas Library | Introduction to Programming</title>
  <meta name="description" content="Course reader for IMT/LIS 511: Introduction to Programming for Information and Data Science." />
  <meta name="generator" content="bookdown 0.24 and GitBook 2.6.7" />

  <meta property="og:title" content="Chapter 9 The pandas Library | Introduction to Programming" />
  <meta property="og:type" content="book" />
  <meta property="og:url" content="https://infx511.github.io/" />
  
  <meta property="og:description" content="Course reader for IMT/LIS 511: Introduction to Programming for Information and Data Science." />
  <meta name="github-repo" content="infx511/book" />

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Chapter 9 The pandas Library | Introduction to Programming" />
  
  <meta name="twitter:description" content="Course reader for IMT/LIS 511: Introduction to Programming for Information and Data Science." />
  

<meta name="author" content="Joel Ross" />


<meta name="date" content="2023-07-26" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
  
  <link rel="shortcut icon" href="img/ischool-gold.png" type="image/x-icon" />
<link rel="prev" href="functional-programming.html"/>
<link rel="next" href="accessing-web-apis.html"/>
<script src="libs/header-attrs-2.11/header-attrs.js"></script>
<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />


<link href="libs/anchor-sections-1.0.1/anchor-sections.css" rel="stylesheet" />
<script src="libs/anchor-sections-1.0.1/anchor-sections.js"></script>


<link rel="stylesheet" href="css/style.css" type="text/css" />
<link rel="stylesheet" href="css/prism.min.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><a href="./" class="title">Introduction to Programming</a></li>

<li class="divider"></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>About this Book</a></li>
<li class="part"><span><b>I Python Basics</b></span></li>
<li class="chapter" data-level="1" data-path="python-intro.html"><a href="python-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction to Python</a>
<ul>
<li class="chapter" data-level="1.1" data-path="python-intro.html"><a href="python-intro.html#programming-with-python"><i class="fa fa-check"></i><b>1.1</b> Programming with Python</a>
<ul>
<li class="chapter" data-level="1.1.1" data-path="python-intro.html"><a href="python-intro.html#versions"><i class="fa fa-check"></i><b>1.1.1</b> Versions</a></li>
</ul></li>
<li class="chapter" data-level="1.2" data-path="python-intro.html"><a href="python-intro.html#running-python-code"><i class="fa fa-check"></i><b>1.2</b> Running Python Code</a>
<ul>
<li class="chapter" data-level="1.2.1" data-path="python-intro.html"><a href="python-intro.html#jupyter-notebooks"><i class="fa fa-check"></i><b>1.2.1</b> Jupyter Notebooks</a></li>
<li class="chapter" data-level="1.2.2" data-path="python-intro.html"><a href="python-intro.html#on-the-command-line"><i class="fa fa-check"></i><b>1.2.2</b> On the Command Line</a></li>
</ul></li>
<li class="chapter" data-level="1.3" data-path="python-intro.html"><a href="python-intro.html#comments"><i class="fa fa-check"></i><b>1.3</b> Comments</a></li>
<li class="chapter" data-level="1.4" data-path="python-intro.html"><a href="python-intro.html#variables"><i class="fa fa-check"></i><b>1.4</b> Variables</a>
<ul>
<li class="chapter" data-level="1.4.1" data-path="python-intro.html"><a href="python-intro.html#data-types"><i class="fa fa-check"></i><b>1.4.1</b> Data Types</a></li>
</ul></li>
<li class="chapter" data-level="1.5" data-path="python-intro.html"><a href="python-intro.html#getting-help"><i class="fa fa-check"></i><b>1.5</b> Getting Help</a></li>
<li class="chapter" data-level="" data-path="python-intro.html"><a href="python-intro.html#resources"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="functions.html"><a href="functions.html"><i class="fa fa-check"></i><b>2</b> Functions</a>
<ul>
<li class="chapter" data-level="2.1" data-path="functions.html"><a href="functions.html#what-are-functions"><i class="fa fa-check"></i><b>2.1</b> What are Functions?</a></li>
<li class="chapter" data-level="2.2" data-path="functions.html"><a href="functions.html#python-function-syntax"><i class="fa fa-check"></i><b>2.2</b> Python Function Syntax</a>
<ul>
<li class="chapter" data-level="2.2.1" data-path="functions.html"><a href="functions.html#object-methods"><i class="fa fa-check"></i><b>2.2.1</b> Object Methods</a></li>
</ul></li>
<li class="chapter" data-level="2.3" data-path="functions.html"><a href="functions.html#built-in-python-functions"><i class="fa fa-check"></i><b>2.3</b> Built-in Python Functions</a>
<ul>
<li class="chapter" data-level="2.3.1" data-path="functions.html"><a href="functions.html#modules-and-libraries"><i class="fa fa-check"></i><b>2.3.1</b> Modules and Libraries</a></li>
</ul></li>
<li class="chapter" data-level="2.4" data-path="functions.html"><a href="functions.html#writing-functions"><i class="fa fa-check"></i><b>2.4</b> Writing Functions</a>
<ul>
<li class="chapter" data-level="2.4.1" data-path="functions.html"><a href="functions.html#doc-strings"><i class="fa fa-check"></i><b>2.4.1</b> Doc Strings</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="functions.html"><a href="functions.html#resources-1"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="logic.html"><a href="logic.html"><i class="fa fa-check"></i><b>3</b> Logic and Conditionals</a>
<ul>
<li class="chapter" data-level="3.1" data-path="logic.html"><a href="logic.html#booleans"><i class="fa fa-check"></i><b>3.1</b> Booleans</a>
<ul>
<li class="chapter" data-level="3.1.1" data-path="logic.html"><a href="logic.html#relational-operators"><i class="fa fa-check"></i><b>3.1.1</b> Relational Operators</a></li>
<li class="chapter" data-level="3.1.2" data-path="logic.html"><a href="logic.html#boolean-operators"><i class="fa fa-check"></i><b>3.1.2</b> Boolean Operators</a></li>
</ul></li>
<li class="chapter" data-level="3.2" data-path="logic.html"><a href="logic.html#conditional-statements"><i class="fa fa-check"></i><b>3.2</b> Conditional Statements</a>
<ul>
<li class="chapter" data-level="3.2.1" data-path="logic.html"><a href="logic.html#designing-conditions"><i class="fa fa-check"></i><b>3.2.1</b> Designing Conditions</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="logic.html"><a href="logic.html#determining-module-or-script"><i class="fa fa-check"></i><b>3.3</b> Determining Module or Script</a></li>
<li class="chapter" data-level="" data-path="logic.html"><a href="logic.html#resources-2"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="lists.html"><a href="lists.html"><i class="fa fa-check"></i><b>4</b> Lists and Sequences</a>
<ul>
<li class="chapter" data-level="4.1" data-path="lists.html"><a href="lists.html#what-is-a-list"><i class="fa fa-check"></i><b>4.1</b> What is a List?</a></li>
<li class="chapter" data-level="4.2" data-path="lists.html"><a href="lists.html#list-indices"><i class="fa fa-check"></i><b>4.2</b> List Indices</a></li>
<li class="chapter" data-level="4.3" data-path="lists.html"><a href="lists.html#list-operations-and-methods"><i class="fa fa-check"></i><b>4.3</b> List Operations and Methods</a></li>
<li class="chapter" data-level="4.4" data-path="lists.html"><a href="lists.html#nested-lists"><i class="fa fa-check"></i><b>4.4</b> Nested Lists</a></li>
<li class="chapter" data-level="4.5" data-path="lists.html"><a href="lists.html#other-sequences"><i class="fa fa-check"></i><b>4.5</b> Other Sequences</a>
<ul>
<li class="chapter" data-level="4.5.1" data-path="lists.html"><a href="lists.html#ranges"><i class="fa fa-check"></i><b>4.5.1</b> Ranges</a></li>
<li class="chapter" data-level="4.5.2" data-path="lists.html"><a href="lists.html#tuples"><i class="fa fa-check"></i><b>4.5.2</b> Tuples</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="lists.html"><a href="lists.html#resources-3"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="5" data-path="loops.html"><a href="loops.html"><i class="fa fa-check"></i><b>5</b> Iterating with Loops</a>
<ul>
<li class="chapter" data-level="5.1" data-path="loops.html"><a href="loops.html#for-loops"><i class="fa fa-check"></i><b>5.1</b> For Loops</a>
<ul>
<li class="chapter" data-level="5.1.1" data-path="loops.html"><a href="loops.html#variables-and-loops"><i class="fa fa-check"></i><b>5.1.1</b> Variables and Loops</a></li>
</ul></li>
<li class="chapter" data-level="5.2" data-path="loops.html"><a href="loops.html#lists-and-loops"><i class="fa fa-check"></i><b>5.2</b> Lists and Loops</a></li>
<li class="chapter" data-level="5.3" data-path="loops.html"><a href="loops.html#nested-loops"><i class="fa fa-check"></i><b>5.3</b> Nested Loops</a></li>
<li class="chapter" data-level="5.4" data-path="loops.html"><a href="loops.html#list-comprehensions"><i class="fa fa-check"></i><b>5.4</b> List Comprehensions</a></li>
<li class="chapter" data-level="5.5" data-path="loops.html"><a href="loops.html#while-loops"><i class="fa fa-check"></i><b>5.5</b> While Loops</a>
<ul>
<li class="chapter" data-level="5.5.1" data-path="loops.html"><a href="loops.html#counting-with-while-loops"><i class="fa fa-check"></i><b>5.5.1</b> Counting with While Loops</a></li>
<li class="chapter" data-level="5.5.2" data-path="loops.html"><a href="loops.html#sentinels"><i class="fa fa-check"></i><b>5.5.2</b> Sentinels</a></li>
<li class="chapter" data-level="5.5.3" data-path="loops.html"><a href="loops.html#difference-between-for-and-while-loops"><i class="fa fa-check"></i><b>5.5.3</b> Difference Between For and While Loops</a></li>
</ul></li>
<li class="chapter" data-level="5.6" data-path="loops.html"><a href="loops.html#iterating-over-files"><i class="fa fa-check"></i><b>5.6</b> Iterating over Files</a>
<ul>
<li class="chapter" data-level="5.6.1" data-path="loops.html"><a href="loops.html#tryexcept"><i class="fa fa-check"></i><b>5.6.1</b> Try/Except</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="loops.html"><a href="loops.html#resources-4"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="dictionaries.html"><a href="dictionaries.html"><i class="fa fa-check"></i><b>6</b> Dictionaries</a>
<ul>
<li class="chapter" data-level="6.1" data-path="dictionaries.html"><a href="dictionaries.html#what-is-a-dictionary"><i class="fa fa-check"></i><b>6.1</b> What is a Dictionary?</a></li>
<li class="chapter" data-level="6.2" data-path="dictionaries.html"><a href="dictionaries.html#accessing-a-dictionary"><i class="fa fa-check"></i><b>6.2</b> Accessing a Dictionary</a></li>
<li class="chapter" data-level="6.3" data-path="dictionaries.html"><a href="dictionaries.html#dictionary-methods"><i class="fa fa-check"></i><b>6.3</b> Dictionary Methods</a></li>
<li class="chapter" data-level="6.4" data-path="dictionaries.html"><a href="dictionaries.html#nesting-dictionaries"><i class="fa fa-check"></i><b>6.4</b> Nesting Dictionaries</a></li>
<li class="chapter" data-level="6.5" data-path="dictionaries.html"><a href="dictionaries.html#dictionaries-and-loops"><i class="fa fa-check"></i><b>6.5</b> Dictionaries and Loops</a>
<ul>
<li class="chapter" data-level="6.5.1" data-path="dictionaries.html"><a href="dictionaries.html#dictionary-comprehensions"><i class="fa fa-check"></i><b>6.5.1</b> Dictionary Comprehensions</a></li>
</ul></li>
<li class="chapter" data-level="6.6" data-path="dictionaries.html"><a href="dictionaries.html#which-data-structure-do-i-use"><i class="fa fa-check"></i><b>6.6</b> Which Data Structure Do I Use?</a></li>
<li class="chapter" data-level="" data-path="dictionaries.html"><a href="dictionaries.html#resources-5"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="searching.html"><a href="searching.html"><i class="fa fa-check"></i><b>7</b> Searching and Filtering</a>
<ul>
<li class="chapter" data-level="7.1" data-path="searching.html"><a href="searching.html#linear-search"><i class="fa fa-check"></i><b>7.1</b> Linear Search</a>
<ul>
<li class="chapter" data-level="7.1.1" data-path="searching.html"><a href="searching.html#maximal-search"><i class="fa fa-check"></i><b>7.1.1</b> Maximal Search</a></li>
<li class="chapter" data-level="7.1.2" data-path="searching.html"><a href="searching.html#falsification-search"><i class="fa fa-check"></i><b>7.1.2</b> Falsification Search</a></li>
</ul></li>
<li class="chapter" data-level="7.2" data-path="searching.html"><a href="searching.html#filtering"><i class="fa fa-check"></i><b>7.2</b> Filtering</a></li>
<li class="chapter" data-level="7.3" data-path="searching.html"><a href="searching.html#mapping"><i class="fa fa-check"></i><b>7.3</b> Mapping</a></li>
<li class="chapter" data-level="7.4" data-path="searching.html"><a href="searching.html#search-efficiency"><i class="fa fa-check"></i><b>7.4</b> Search Efficiency</a>
<ul>
<li class="chapter" data-level="7.4.1" data-path="searching.html"><a href="searching.html#linear-search-speed"><i class="fa fa-check"></i><b>7.4.1</b> Linear Search Speed</a></li>
<li class="chapter" data-level="7.4.2" data-path="searching.html"><a href="searching.html#faster-searching-binary-search"><i class="fa fa-check"></i><b>7.4.2</b> Faster Searching: Binary Search</a></li>
<li class="chapter" data-level="7.4.3" data-path="searching.html"><a href="searching.html#slower-algorithms-sorting"><i class="fa fa-check"></i><b>7.4.3</b> Slower Algorithms: Sorting</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="searching.html"><a href="searching.html#resources-6"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="functional-programming.html"><a href="functional-programming.html"><i class="fa fa-check"></i><b>8</b> Functional Programming</a>
<ul>
<li class="chapter" data-level="8.1" data-path="functional-programming.html"><a href="functional-programming.html#functions-are-variables"><i class="fa fa-check"></i><b>8.1</b> Functions ARE Variables</a>
<ul>
<li class="chapter" data-level="8.1.1" data-path="functional-programming.html"><a href="functional-programming.html#lambdas-anonymous-functions"><i class="fa fa-check"></i><b>8.1.1</b> lambdas: Anonymous Functions</a></li>
</ul></li>
<li class="chapter" data-level="8.2" data-path="functional-programming.html"><a href="functional-programming.html#functional-looping"><i class="fa fa-check"></i><b>8.2</b> Functional Looping</a>
<ul>
<li class="chapter" data-level="8.2.1" data-path="functional-programming.html"><a href="functional-programming.html#map"><i class="fa fa-check"></i><b>8.2.1</b> Map</a></li>
<li class="chapter" data-level="8.2.2" data-path="functional-programming.html"><a href="functional-programming.html#filter"><i class="fa fa-check"></i><b>8.2.2</b> Filter</a></li>
<li class="chapter" data-level="8.2.3" data-path="functional-programming.html"><a href="functional-programming.html#reduce"><i class="fa fa-check"></i><b>8.2.3</b> Reduce</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="functional-programming.html"><a href="functional-programming.html#resources-7"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="part"><span><b>II Specific Libraries</b></span></li>
<li class="chapter" data-level="9" data-path="pandas.html"><a href="pandas.html"><i class="fa fa-check"></i><b>9</b> The <code>pandas</code> Library</a>
<ul>
<li class="chapter" data-level="9.1" data-path="pandas.html"><a href="pandas.html#setting-up-pandas"><i class="fa fa-check"></i><b>9.1</b> Setting up <code>pandas</code></a></li>
<li class="chapter" data-level="9.2" data-path="pandas.html"><a href="pandas.html#series"><i class="fa fa-check"></i><b>9.2</b> Series</a>
<ul>
<li class="chapter" data-level="9.2.1" data-path="pandas.html"><a href="pandas.html#series-operations"><i class="fa fa-check"></i><b>9.2.1</b> Series Operations</a></li>
<li class="chapter" data-level="9.2.2" data-path="pandas.html"><a href="pandas.html#series-methods"><i class="fa fa-check"></i><b>9.2.2</b> Series Methods</a></li>
<li class="chapter" data-level="9.2.3" data-path="pandas.html"><a href="pandas.html#series-indexing"><i class="fa fa-check"></i><b>9.2.3</b> Series Indexing</a></li>
</ul></li>
<li class="chapter" data-level="9.3" data-path="pandas.html"><a href="pandas.html#dataframes"><i class="fa fa-check"></i><b>9.3</b> DataFrames</a>
<ul>
<li class="chapter" data-level="9.3.1" data-path="pandas.html"><a href="pandas.html#dataframe-operations"><i class="fa fa-check"></i><b>9.3.1</b> DataFrame Operations</a></li>
<li class="chapter" data-level="9.3.2" data-path="pandas.html"><a href="pandas.html#dataframe-methods"><i class="fa fa-check"></i><b>9.3.2</b> DataFrame Methods</a></li>
<li class="chapter" data-level="9.3.3" data-path="pandas.html"><a href="pandas.html#accessing-dataframes"><i class="fa fa-check"></i><b>9.3.3</b> Accessing DataFrames</a></li>
</ul></li>
<li class="chapter" data-level="9.4" data-path="pandas.html"><a href="pandas.html#grouping"><i class="fa fa-check"></i><b>9.4</b> Grouping</a>
<ul>
<li class="chapter" data-level="9.4.1" data-path="pandas.html"><a href="pandas.html#aggregation"><i class="fa fa-check"></i><b>9.4.1</b> Aggregation</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="pandas.html"><a href="pandas.html#resources-8"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="10" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html"><i class="fa fa-check"></i><b>10</b> Accessing Web APIs</a>
<ul>
<li class="chapter" data-level="10.1" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#web-apis"><i class="fa fa-check"></i><b>10.1</b> Web APIs</a></li>
<li class="chapter" data-level="10.2" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#restful-requests"><i class="fa fa-check"></i><b>10.2</b> RESTful Requests</a>
<ul>
<li class="chapter" data-level="10.2.1" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#uris"><i class="fa fa-check"></i><b>10.2.1</b> URIs</a></li>
<li class="chapter" data-level="10.2.2" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#http-verbs"><i class="fa fa-check"></i><b>10.2.2</b> HTTP Verbs</a></li>
</ul></li>
<li class="chapter" data-level="10.3" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#accessing-web-apis-1"><i class="fa fa-check"></i><b>10.3</b> Accessing Web APIs</a></li>
<li class="chapter" data-level="10.4" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#json-data"><i class="fa fa-check"></i><b>10.4</b> JSON Data</a></li>
<li class="chapter" data-level="" data-path="accessing-web-apis.html"><a href="accessing-web-apis.html#resources-9"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="appendix"><span><b>Appendix</b></span></li>
<li class="chapter" data-level="A" data-path="installing.html"><a href="installing.html"><i class="fa fa-check"></i><b>A</b> Installing Python</a>
<ul>
<li class="chapter" data-level="A.1" data-path="installing.html"><a href="installing.html#python"><i class="fa fa-check"></i><b>A.1</b> Python</a>
<ul>
<li class="chapter" data-level="A.1.1" data-path="installing.html"><a href="installing.html#anaconda"><i class="fa fa-check"></i><b>A.1.1</b> Anaconda</a></li>
</ul></li>
<li class="chapter" data-level="A.2" data-path="installing.html"><a href="installing.html#text-editors"><i class="fa fa-check"></i><b>A.2</b> Text Editors</a>
<ul>
<li class="chapter" data-level="A.2.1" data-path="installing.html"><a href="installing.html#visual-studio-code"><i class="fa fa-check"></i><b>A.2.1</b> Visual Studio Code</a></li>
<li class="chapter" data-level="A.2.2" data-path="installing.html"><a href="installing.html#atom"><i class="fa fa-check"></i><b>A.2.2</b> Atom</a></li>
<li class="chapter" data-level="A.2.3" data-path="installing.html"><a href="installing.html#sublime-text"><i class="fa fa-check"></i><b>A.2.3</b> Sublime Text</a></li>
<li class="chapter" data-level="A.2.4" data-path="installing.html"><a href="installing.html#pycharm"><i class="fa fa-check"></i><b>A.2.4</b> PyCharm</a></li>
</ul></li>
<li class="chapter" data-level="A.3" data-path="installing.html"><a href="installing.html#command-line-tools-bash"><i class="fa fa-check"></i><b>A.3</b> Command Line Tools (Bash)</a>
<ul>
<li class="chapter" data-level="A.3.1" data-path="installing.html"><a href="installing.html#command-line-on-a-mac"><i class="fa fa-check"></i><b>A.3.1</b> Command Line on a Mac</a></li>
<li class="chapter" data-level="A.3.2" data-path="installing.html"><a href="installing.html#command-line-on-windows"><i class="fa fa-check"></i><b>A.3.2</b> Command Line on Windows</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="installing.html"><a href="installing.html#resources-10"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="B" data-path="markdown.html"><a href="markdown.html"><i class="fa fa-check"></i><b>B</b> Markdown</a>
<ul>
<li class="chapter" data-level="B.1" data-path="markdown.html"><a href="markdown.html#writing-markdown"><i class="fa fa-check"></i><b>B.1</b> Writing Markdown</a>
<ul>
<li class="chapter" data-level="B.1.1" data-path="markdown.html"><a href="markdown.html#text-formatting"><i class="fa fa-check"></i><b>B.1.1</b> Text Formatting</a></li>
<li class="chapter" data-level="B.1.2" data-path="markdown.html"><a href="markdown.html#text-blocks"><i class="fa fa-check"></i><b>B.1.2</b> Text Blocks</a></li>
</ul></li>
<li class="chapter" data-level="B.2" data-path="markdown.html"><a href="markdown.html#rendering-markdown"><i class="fa fa-check"></i><b>B.2</b> Rendering Markdown</a></li>
<li class="chapter" data-level="" data-path="markdown.html"><a href="markdown.html#resources-11"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="C" data-path="command-line.html"><a href="command-line.html"><i class="fa fa-check"></i><b>C</b> The Command Line</a>
<ul>
<li class="chapter" data-level="C.1" data-path="command-line.html"><a href="command-line.html#accessing-the-command-line"><i class="fa fa-check"></i><b>C.1</b> Accessing the Command line</a></li>
<li class="chapter" data-level="C.2" data-path="command-line.html"><a href="command-line.html#navigating-the-command-line"><i class="fa fa-check"></i><b>C.2</b> Navigating the Command Line</a>
<ul>
<li class="chapter" data-level="C.2.1" data-path="command-line.html"><a href="command-line.html#changing-directories"><i class="fa fa-check"></i><b>C.2.1</b> Changing Directories</a></li>
<li class="chapter" data-level="C.2.2" data-path="command-line.html"><a href="command-line.html#listing-files"><i class="fa fa-check"></i><b>C.2.2</b> Listing Files</a></li>
<li class="chapter" data-level="C.2.3" data-path="command-line.html"><a href="command-line.html#paths"><i class="fa fa-check"></i><b>C.2.3</b> Paths</a></li>
</ul></li>
<li class="chapter" data-level="C.3" data-path="command-line.html"><a href="command-line.html#file-commands"><i class="fa fa-check"></i><b>C.3</b> File Commands</a>
<ul>
<li class="chapter" data-level="C.3.1" data-path="command-line.html"><a href="command-line.html#learning-new-commands"><i class="fa fa-check"></i><b>C.3.1</b> Learning New Commands</a></li>
<li class="chapter" data-level="C.3.2" data-path="command-line.html"><a href="command-line.html#wildcards"><i class="fa fa-check"></i><b>C.3.2</b> Wildcards</a></li>
</ul></li>
<li class="chapter" data-level="C.4" data-path="command-line.html"><a href="command-line.html#dealing-with-errors"><i class="fa fa-check"></i><b>C.4</b> Dealing With Errors</a></li>
<li class="chapter" data-level="C.5" data-path="command-line.html"><a href="command-line.html#directing-output"><i class="fa fa-check"></i><b>C.5</b> Directing Output</a></li>
<li class="chapter" data-level="C.6" data-path="command-line.html"><a href="command-line.html#shell-scripts"><i class="fa fa-check"></i><b>C.6</b> Shell Scripts</a></li>
<li class="chapter" data-level="" data-path="command-line.html"><a href="command-line.html#resources-12"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="chapter" data-level="D" data-path="javascript.html"><a href="javascript.html"><i class="fa fa-check"></i><b>D</b> JavaScript</a>
<ul>
<li class="chapter" data-level="D.1" data-path="javascript.html"><a href="javascript.html#programming-with-javascript"><i class="fa fa-check"></i><b>D.1</b> Programming with JavaScript</a>
<ul>
<li class="chapter" data-level="D.1.1" data-path="javascript.html"><a href="javascript.html#history-and-versions"><i class="fa fa-check"></i><b>D.1.1</b> History and Versions</a></li>
<li class="chapter" data-level="D.1.2" data-path="javascript.html"><a href="javascript.html#running-javascript"><i class="fa fa-check"></i><b>D.1.2</b> Running JavaScript</a></li>
</ul></li>
<li class="chapter" data-level="D.2" data-path="javascript.html"><a href="javascript.html#javascript-basics"><i class="fa fa-check"></i><b>D.2</b> JavaScript Basics</a>
<ul>
<li class="chapter" data-level="D.2.1" data-path="javascript.html"><a href="javascript.html#strict-mode"><i class="fa fa-check"></i><b>D.2.1</b> Strict Mode</a></li>
</ul></li>
<li class="chapter" data-level="D.3" data-path="javascript.html"><a href="javascript.html#variables-1"><i class="fa fa-check"></i><b>D.3</b> Variables</a>
<ul>
<li class="chapter" data-level="D.3.1" data-path="javascript.html"><a href="javascript.html#basic-data-types"><i class="fa fa-check"></i><b>D.3.1</b> Basic Data Types</a></li>
<li class="chapter" data-level="D.3.2" data-path="javascript.html"><a href="javascript.html#type-coercion"><i class="fa fa-check"></i><b>D.3.2</b> Type Coercion</a></li>
<li class="chapter" data-level="D.3.3" data-path="javascript.html"><a href="javascript.html#arrays"><i class="fa fa-check"></i><b>D.3.3</b> Arrays</a></li>
<li class="chapter" data-level="D.3.4" data-path="javascript.html"><a href="javascript.html#objects"><i class="fa fa-check"></i><b>D.3.4</b> Objects</a></li>
</ul></li>
<li class="chapter" data-level="D.4" data-path="javascript.html"><a href="javascript.html#control-structures"><i class="fa fa-check"></i><b>D.4</b> Control Structures</a>
<ul>
<li class="chapter" data-level="D.4.1" data-path="javascript.html"><a href="javascript.html#conditionals"><i class="fa fa-check"></i><b>D.4.1</b> Conditionals</a></li>
<li class="chapter" data-level="D.4.2" data-path="javascript.html"><a href="javascript.html#loops-1"><i class="fa fa-check"></i><b>D.4.2</b> Loops</a></li>
</ul></li>
<li class="chapter" data-level="D.5" data-path="javascript.html"><a href="javascript.html#functions-1"><i class="fa fa-check"></i><b>D.5</b> Functions</a>
<ul>
<li class="chapter" data-level="D.5.1" data-path="javascript.html"><a href="javascript.html#functional-programming-1"><i class="fa fa-check"></i><b>D.5.1</b> Functional Programming</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="javascript.html"><a href="javascript.html#resources-13"><i class="fa fa-check"></i>Resources</a></li>
</ul></li>
<li class="divider"></li>
<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Introduction to Programming</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="pandas" class="section level1" number="9">
<h1><span class="header-section-number">Chapter 9</span> The <code>pandas</code> Library</h1>
<p>This chapter introduces the <em>Python Data Analysis</em> library <a href="http://pandas.pydata.org/"><strong><code>pandas</code></strong></a>—a set of modules, functions, and classes used to easily and efficiently perform data analysis—<code>panda</code>’s speciality is its highly optimized performance when working with large data sets. <code>pandas</code> is the most common library used with Python for Data Science (and mirrors the <code>R</code> language in many ways, allowing programmers to easily move between the two).
This chapter will give you an introduction to this library. It will discuss the two main data structures used by <code>pandas</code> (<em>Series</em> and <em>DataFrames</em>) and how to use them to organize and work with data, as well as to perform basic grouping and aggregatin functions. Note that this should not be considered a complete reference or tutorial for the library—there’s a lot it can do, and this text gives you just a taste!</p>
<div id="setting-up-pandas" class="section level2" number="9.1">
<h2><span class="header-section-number">9.1</span> Setting up <code>pandas</code></h2>
<p><code>pandas</code> is a <strong>third-party</strong> library (not built into Python!), but is included by default with most Python setups such as Anaconda and so can be usually be imported with additional installation. Additionally, <code>pandas</code> is built on top of the <a href="http://www.numpy.org/"><code>numpy</code></a> scientific computing library which supports highly optimized mathematical operations. Thus many <code>pandas</code> operations involve working with <code>numpy</code> data structures, and the <code>pandas</code> library requires <code>numpy</code> (which also included in Anaconda) to be imported alongside it:</p>
<pre class="language-python"><code><span class="token comment"># import libraries</span>
<span class="token keyword">import</span> pandas <span class="token keyword">as</span> pd  <span class="token comment"># standard shortcut names</span>
<span class="token keyword">import</span> numpy <span class="token keyword">as</span> np</code></pre>
<p>Normal practice is to <code>import</code> the module and reference types and methods using dot notation, rather than importing them into the global namespace. This helps keep the global namespace from being overfilled with variables. Also note that this chapter will focus primarily on <code>pandas</code>, leaving <code>numpy</code>-specific data structures and functions for the reader to explore.</p>
</div>
<div id="series" class="section level2" number="9.2">
<h2><span class="header-section-number">9.2</span> Series</h2>
<p>The first basic <code>pandas</code> data structure is a <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html"><strong>Series</strong></a>. A Series represents a <em>one-dimensional ordered collection of values</em>, making them similar to a regular Python <em>list</em>. However, elements can also be given <em>labels</em> (called the <strong>index</strong>), which can be non-numeric values similars to the keys in a Python <em>dictionary</em>. This makes a Series a bit like an “ordered dictionary”—one that supports additional methods and efficient data-processing behaviors.</p>
<p>Series can be created using the <code>Series()</code> function (a <em>constructor</em> for instances of the class):</p>
<pre class="language-python"><code><span class="token comment"># create a Series from a list</span>
number_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>number_series<span class="token punctuation">)</span></code></pre>
<p>This code would print out:</p>
<pre><code>0    1
1    2
2    2
3    3
4    5
5    8
dtype: int64</code></pre>
<p>Printing a Series will display it like a <em>table</em>: the first value in each row is the <strong>index</strong> (label) of that element, and the second is the value of the element in the Series. Printing will also display the <em>type</em> of the elements in the Series. All elements in the Series will be treated as “same” type—if you create a Series from mixed elements (e.g., numbers and strings), the type will be the a generic <code>object</code>. In practice, you almost always create Series from a single type.</p>
<p>If you create a Series from a list, each element will be given an <em>index</em> (label) that is that values’s index in the list. You can also create a Series from a <em>dictionary</em>, in which case the keys will be used as the index labels:</p>
<pre class="language-python"><code><span class="token comment"># create a Series from a dictionary</span>
age_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'sarah'</span><span class="token punctuation">:</span> <span class="token number">42</span><span class="token punctuation">,</span> <span class="token string">'amit'</span><span class="token punctuation">:</span> <span class="token number">35</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">}</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>age_series<span class="token punctuation">)</span></code></pre>
<pre><code>amit     35
sarah    42
zhang    13
dtype: int64</code></pre>
<p>Notice that the Series is automatically <strong>sorted</strong> by the keys of the dictionary! This means that the order of the elements in the Series will always be the same for a given dictionary (which cannot be said for the dictionary items themselves).</p>
<div id="series-operations" class="section level3" number="9.2.1">
<h3><span class="header-section-number">9.2.1</span> Series Operations</h3>
<p>The main benefit of Series (as opposed to normal lists or dictionaries) is that they provide a number of operations and methods that make it easy to consider and modify the entire Series, rather than needing to work with each element individually. These functions include built-in <em>mapping</em> and <em>filtering</em> style operations, as well as <em>reducing</em> aggregations.</p>
<p>When using basic operators (whether math operators such as <code>+</code> and <code>-</code>, or relational operators such as <code>&gt;</code> or <code>==</code>) on a Series are what are called <em>vectorized operations</em>, meaning the operation is applied to Series elements <strong>pair-wise</strong>. This means that each element from the first Series operand is modified by the element in the same corresponding position in the second Series operand. This will produce the value at the corresponding position of the resulting Series. In other words, if you want to add two Series, then the value of the first element in the result will be the sum of the first elements in each Series, the second element in the result will be the sum of the second elements in each Series, and so on.</p>
<pre class="language-python"><code><span class="token comment"># Create two Series to combine</span>
s1 <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
s2 <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">6</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

s3 <span class="token operator">=</span> s1 <span class="token operator">+</span> s2 <span class="token comment"># add together</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>s3<span class="token punctuation">)</span>
    <span class="token comment"># 0    4  # 3 + 1</span>
    <span class="token comment"># 1    7  # 1 + 6</span>
    <span class="token comment"># 2    5  # 4 + 1</span>
    <span class="token comment"># 3    9  # 1 + 8</span>
    <span class="token comment"># 4    5  # 5 + 0</span>
    <span class="token comment"># dtype: int64</span></code></pre>
<pre class="language-python"><code><span class="token comment"># Create two Series to combine</span>
s1 <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
s2 <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

<span class="token comment"># Examples of operations (list version only includes values)</span>
<span class="token builtin">list</span><span class="token punctuation">(</span>s1 <span class="token operator">+</span> s2<span class="token punctuation">)</span>  <span class="token comment"># [3, 4, 5, 6, 7]</span>
<span class="token builtin">list</span><span class="token punctuation">(</span>s1 <span class="token operator">/</span> s2<span class="token punctuation">)</span>  <span class="token comment"># [2.0, 1.0, 0.66666666666666663, 0.5, 0.40000000000000002]</span>
<span class="token builtin">list</span><span class="token punctuation">(</span>s1 <span class="token operator">&lt;</span> s2<span class="token punctuation">)</span>  <span class="token comment"># [False, False, True, True, True]</span>

<span class="token comment"># Add a Series to itself (why not?)</span>
<span class="token builtin">list</span><span class="token punctuation">(</span>s2 <span class="token operator">+</span> s2<span class="token punctuation">)</span>  <span class="token comment"># [2, 4, 6, 8, 10]</span>

<span class="token comment"># Perform more advanced arithmetic!</span>
s3 <span class="token operator">=</span> <span class="token punctuation">(</span>s1 <span class="token operator">+</span> s2<span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token punctuation">(</span>s1 <span class="token operator">+</span> s1<span class="token punctuation">)</span>
<span class="token builtin">list</span><span class="token punctuation">(</span>s3<span class="token punctuation">)</span>  <span class="token comment"># [0.75, 1.0, 1.25, 1.5, 1.75]</span></code></pre>
<p>These operations will be <em>fast</em>, even for very large Series, allowing for effective data manipulations.</p>
<p>Remember that Series operations are performed on <em>matching</em> indices. If one operand doesn’t have the same index, then you’ll get an undefined value:</p>
<pre class="language-python"><code>alpha_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'a'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'b'</span><span class="token punctuation">:</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token string">'c'</span><span class="token punctuation">:</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token string">'m'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">}</span><span class="token punctuation">)</span>
omega_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'m'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">,</span> <span class="token string">'z'</span><span class="token punctuation">:</span> <span class="token number">26</span><span class="token punctuation">}</span><span class="token punctuation">)</span>

result <span class="token operator">=</span> alpha_series <span class="token operator">+</span> omega_series
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">)</span>
    <span class="token comment"># a     NaN</span>
    <span class="token comment"># b     NaN</span>
    <span class="token comment"># c     NaN</span>
    <span class="token comment"># m    26.0</span>
    <span class="token comment"># z     NaN</span>
    <span class="token comment"># dtype: float64</span></code></pre>
<p>In this example, only the <code>'m'</code> label was shared between the series, so only that one matched: the other values weren’t defined so produces <code>NaN</code> (Not A Number) values as a result. Thus you can perform mathemtical operation on Series of different sizes; its just that some indices won’t match. But if you try to compare (using <code>&lt;</code> or <code>==</code>) Series with different indices, you will get an error.</p>
<p>It is also possible to use a <em>scalar</em> (a single value) as an operand with a Series. This is referred to as <a href="https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html"><strong>broadcasting</strong></a>. The idea is that the smaller “set” of elements (e.g., a single value) is <em>broadcast</em> — expanded — so that it has a comparable size, thereby allowing different “sized” data structures to interact. Technically, operating on a Series with a scalar is actually a specific case of operating on it with another Series!</p>
<pre class="language-python"><code>sample <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span><span class="token number">6</span><span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># Series of numbers from 1 to 5 (6 is excluded)</span>
result <span class="token operator">=</span> sample <span class="token operator">+</span> <span class="token number">4</span>  <span class="token comment"># add 4 to each element (produces new Series)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">)</span>
    <span class="token comment"># 0    5</span>
    <span class="token comment"># 1    6</span>
    <span class="token comment"># 2    7</span>
    <span class="token comment"># 3    8</span>
    <span class="token comment"># 4    9</span>
    <span class="token comment"># dtype: int64</span>

is_less_than_3 <span class="token operator">=</span> sample <span class="token operator">&lt;</span> <span class="token number">3</span>  <span class="token comment"># compare each element</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>is_less_than_3<span class="token punctuation">)</span>
    <span class="token comment"># 0     True</span>
    <span class="token comment"># 1     True</span>
    <span class="token comment"># 2    False # comparing to the value, not the index!</span>
    <span class="token comment"># 3    False</span>
    <span class="token comment"># 4    False</span>
    <span class="token comment"># dtype: bool</span></code></pre>
<p>Series containing booleans also support <em>logical operators</em> (“and” and “or”), using the operators <strong><code>&amp;</code></strong> for “and” and <strong><code>|</code></strong> for “or”:</p>
<pre class="language-python"><code>days_it_rained <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"mon"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"tue"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"wed"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token string">"thu"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"fri"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">}</span><span class="token punctuation">)</span>
days_it_snowed <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">"mon"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token string">"tue"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"wed"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"thu"</span><span class="token punctuation">:</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token string">"fri"</span><span class="token punctuation">:</span> <span class="token boolean">False</span><span class="token punctuation">}</span><span class="token punctuation">)</span>
days_it_rained_and_snowed <span class="token operator">=</span> days_it_rained <span class="token operator">&amp;</span> days_it_snowed <span class="token comment"># combine with "and"</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>days_it_rained_and_snowed<span class="token punctuation">)</span>
    <span class="token comment"># mon    False</span>
    <span class="token comment"># tue     True</span>
    <span class="token comment"># wed    False</span>
    <span class="token comment"># thu     True</span>
    <span class="token comment"># fri    False</span>
    <span class="token comment"># dtype: bool</span></code></pre>
</div>
<div id="series-methods" class="section level3" number="9.2.2">
<h3><span class="header-section-number">9.2.2</span> Series Methods</h3>
<p><code>pandas</code> Series also include a number of <a href="http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html"><em>methods</em></a> for inspecting and manipulating their data. Some useful examples shown below (this is not a comprehensive listing):</p>
<pre class="language-python"><code>sample_numbers <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">8</span><span class="token punctuation">,</span> <span class="token number">6</span><span class="token punctuation">,</span> <span class="token number">7</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">9</span><span class="token punctuation">]</span><span class="token punctuation">)</span> <span class="token comment"># an example to work with</span>

<span class="token comment"># The `head(n)` method returns a Series containing only the first `n` elements</span>
<span class="token comment"># Note that this is a *new Series*</span>
<span class="token comment"># The `tail(n)` method works similarly, but returns the last `n` elements.</span>
first_3 <span class="token operator">=</span> sample_numbers<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token number">3</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>first_3<span class="token punctuation">)</span>
    <span class="token comment"># 0    8</span>
    <span class="token comment"># 1    6</span>
    <span class="token comment"># 2    7</span>
    <span class="token comment"># dtype: int64</span>

<span class="token comment"># You can use simple aggregation methods, such as:</span>
maximum_value <span class="token operator">=</span> sample_numbers<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment"># gets the maximum value of the series</span>
maximum_value_index <span class="token operator">=</span> sample_numbers<span class="token punctuation">.</span>idxmax<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment"># get the label (index) where the maximum value is</span>
mean_of_values <span class="token operator">=</span> sample_numbers<span class="token punctuation">.</span>mean<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment"># gets the average (statistical mean) of the values</span>
std_dev_of_values <span class="token operator">=</span> sample<span class="token punctuation">.</span>numbers<span class="token punctuation">.</span>std<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token comment"># gets the standard deviation of the values</span>

<span class="token comment"># There are also aggregation methods for Series of booleans</span>
<span class="token comment"># The `any()` method returns whether ANY of the elements are `True`</span>
<span class="token comment"># The `all()` method returns whether ALL of the elements are `True`</span>
larger_than_3_series <span class="token operator">=</span> sample_numbers <span class="token operator">></span> <span class="token number">3</span> <span class="token comment"># make a new series of Booleans</span>
any_bigger <span class="token operator">=</span> larger_than_3_series<span class="token punctuation">.</span><span class="token builtin">any</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
all_bigger <span class="token operator">=</span> larger_than_3_series<span class="token punctuation">.</span><span class="token builtin">all</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>any_bigger<span class="token punctuation">)</span> <span class="token comment"># True</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>all_bigger<span class="token punctuation">)</span> <span class="token comment"># False</span>

<span class="token comment"># get a Series of descriptive statistics</span>
<span class="token comment"># you would need to access the individual values to use them</span>
description_series <span class="token operator">=</span> sample_numbers<span class="token punctuation">.</span>describe<span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>description_series<span class="token punctuation">)</span>
    <span class="token comment"># count    7.000000</span>
    <span class="token comment"># mean     5.428571</span>
    <span class="token comment"># std      3.101459</span>
    <span class="token comment"># min      0.000000</span>
    <span class="token comment"># 25%      4.000000</span>
    <span class="token comment"># 50%      6.000000</span>
    <span class="token comment"># 75%      7.500000</span>
    <span class="token comment"># max      9.000000</span>
    <span class="token comment"># dtype: float64</span></code></pre>
<p>Notice that these are <em>methods</em> (called with dot notation). And generally they’re used to do some kind of processing on the whole Series!</p>
<p>Series support many more methods as well: see the <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html">full documentation</a> for a complete list.</p>
</div>
<div id="series-indexing" class="section level3" number="9.2.3">
<h3><span class="header-section-number">9.2.3</span> Series Indexing</h3>
<p>Like dictionaries, each element in a Series has an <strong>index</strong> (a key or label). If that Series was produced from a list the indices will just be numeric positions; if it was produced from a dictionary then the indices will be the keys of that dictionary (but they are still called indices in <code>pandas</code>!) You can access the sequence of indices by using the <code>index</code> <em>attribute</em> (it’s not a method!); you will need to convert that sequence into a list to make use of it.</p>
<pre class="language-python"><code>letter_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'a'</span><span class="token punctuation">,</span> <span class="token string">'b'</span><span class="token punctuation">,</span> <span class="token string">'c'</span><span class="token punctuation">,</span> <span class="token string">'d'</span><span class="token punctuation">,</span> <span class="token string">'e'</span><span class="token punctuation">,</span> <span class="token string">'f'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
age_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'sarah'</span><span class="token punctuation">:</span> <span class="token number">42</span><span class="token punctuation">,</span> <span class="token string">'amit'</span><span class="token punctuation">:</span> <span class="token number">35</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">}</span><span class="token punctuation">)</span>

letter_indices <span class="token operator">=</span> <span class="token builtin">list</span><span class="token punctuation">(</span>letter_series<span class="token punctuation">.</span>index<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>letter_indices<span class="token punctuation">)</span> <span class="token comment"># [0, 1, 2, 3, 4, 5]</span>

age_indices <span class="token operator">=</span> <span class="token builtin">list</span><span class="token punctuation">(</span>age_series<span class="token punctuation">.</span>index<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>age_indices<span class="token punctuation">)</span> <span class="token comment"># ['sarah', 'amit', 'zhang']</span></code></pre>
<p>Just like lists and dictionaries, elements in a Series can be accessed using <strong>bracket notation</strong>, putting the index label inside the brackets:</p>
<pre class="language-python"><code>letter_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'a'</span><span class="token punctuation">,</span> <span class="token string">'b'</span><span class="token punctuation">,</span> <span class="token string">'c'</span><span class="token punctuation">,</span> <span class="token string">'d'</span><span class="token punctuation">,</span> <span class="token string">'e'</span><span class="token punctuation">,</span> <span class="token string">'f'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
age_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'sarah'</span><span class="token punctuation">:</span> <span class="token number">42</span><span class="token punctuation">,</span> <span class="token string">'amit'</span><span class="token punctuation">:</span> <span class="token number">35</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">}</span><span class="token punctuation">)</span>

<span class="token comment"># Get the 1th element from the letter_series</span>
letter_series<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span>  <span class="token comment"># 'b'</span>

<span class="token comment"># Get the 'amit' element from age_series</span>
age_series<span class="token punctuation">[</span><span class="token string">'amit'</span><span class="token punctuation">]</span>  <span class="token comment"># 35</span>

<span class="token comment"># Get the 0th element from age_series</span>
<span class="token comment"># (Series are ordered, so can always be accessed positionally!)</span>
age_series<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>  <span class="token comment"># 42</span></code></pre>
<p>(The returned values from using bracket notation are not technically basic <code>int</code> or <code>float</code> or <code>string</code> types, but are rather specific <code>numpy</code> objects that work almost identically to their normal type, but with some additional optimization. This shouldn’t impact anything you do).</p>
<p>You can also use list-style <em>slices</em> using the colon operator (e.g., elements <strong><code>1:3</code></strong>). Additionally, it is possible to specify <strong><em>a sequence of indicies</em></strong> (i.e., a <em>list</em> or <em>range</em> or even a <em>Series</em> of indices) to access using bracket notation. This will produce a new Series object that contains only the elements that have those labels:</p>
<pre class="language-python"><code>age_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'sarah'</span><span class="token punctuation">:</span> <span class="token number">42</span><span class="token punctuation">,</span> <span class="token string">'amit'</span><span class="token punctuation">:</span> <span class="token number">35</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">:</span> <span class="token number">13</span><span class="token punctuation">}</span><span class="token punctuation">)</span>

index_list <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'sarah'</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>age_series<span class="token punctuation">[</span>index_list<span class="token punctuation">]</span><span class="token punctuation">)</span>
    <span class="token comment"># sarah    42</span>
    <span class="token comment"># zhang    13</span>
    <span class="token comment"># dtype: int64</span>

<span class="token comment"># using an anonymous variable for the index list (notice the brackets!)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>age_series<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">'sarah'</span><span class="token punctuation">,</span> <span class="token string">'zhang'</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    <span class="token comment"># sarah    42</span>
    <span class="token comment"># zhang    13</span>
    <span class="token comment"># dtype: int64</span></code></pre>
<p>This also means that you can use something like a <em>list comprehension</em> (or even a Series operation!) to determine which elements to select from a Series!</p>
<pre class="language-python"><code>letter_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'a'</span><span class="token punctuation">,</span><span class="token string">'b'</span><span class="token punctuation">,</span><span class="token string">'c'</span><span class="token punctuation">,</span><span class="token string">'d'</span><span class="token punctuation">,</span><span class="token string">'e'</span><span class="token punctuation">,</span><span class="token string">'f'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
even_numbers <span class="token operator">=</span> <span class="token punctuation">[</span>num <span class="token keyword">for</span> num <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">6</span><span class="token punctuation">)</span> <span class="token keyword">if</span> num<span class="token operator">%</span><span class="token number">2</span> <span class="token operator">==</span> <span class="token number">0</span><span class="token punctuation">]</span>  <span class="token comment"># [0, 2, 4]</span>

<span class="token comment"># Get letters with even numbered indices</span>
letter_series<span class="token punctuation">[</span>even_numbers<span class="token punctuation">]</span>
    <span class="token comment"># 0    a</span>
    <span class="token comment"># 2    c</span>
    <span class="token comment"># 4    e</span>
    <span class="token comment"># dtype: object</span>

<span class="token comment"># The same thing, but in one line (check the brackets!)</span>
letter_series<span class="token punctuation">[</span><span class="token punctuation">[</span>num <span class="token keyword">for</span> num <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">6</span><span class="token punctuation">)</span> <span class="token keyword">if</span> num<span class="token operator">%</span><span class="token number">2</span> <span class="token operator">==</span> <span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">]</span></code></pre>
<p>Note in the last line that there are two sets of brackets: one set being used to access the values of <code>letter_series</code>, and one used to specify a list (using a comprehension).</p>
<p>Finally, using a <strong><em>sequence of booleans</em></strong> with bracket notation will produce a new Series containing the elements whose position <em>corresponds</em> with the position of a <code>True</code> value. So if the first element of the boolean sqeuence is a <code>True</code>, then the first element of the Series will be accessed; if the second element of the boolean sequence is a <code>True</code>, then the second element of the Series will be accessed, and so on. This process is called <strong>boolean indexing</strong>.</p>
<pre class="language-python"><code>shoe_sizes <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">,</span> <span class="token number">6.5</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">11</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">]</span><span class="token punctuation">)</span>  <span class="token comment"># a series of shoe sizes</span>
index_filter <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">]</span>  <span class="token comment"># list of which elements to extract</span>

<span class="token comment"># Extract every element in an index that is True</span>
shoe_sizes<span class="token punctuation">[</span>index_filter<span class="token punctuation">]</span>  <span class="token comment"># has values 7.0, 11.0, 8.0</span></code></pre>
<p>In this example, since <code>index_filter</code> is <code>True</code> at index 0, 3, and 4, then <code>shoe_sizes[index_filter]</code> returns a Series with the elements from index numbers 0, 3, and 4.</p>
<p>This technique is incredibly powerful because it allows you to easily perform <strong>filtering</strong> operations on a Series:</p>
<pre class="language-python"><code>shoe_sizes <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">,</span> <span class="token number">6.5</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">11</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">]</span><span class="token punctuation">)</span>  <span class="token comment"># a Series of shoe sizes</span>
big_sizes <span class="token operator">=</span> shoe_sizes <span class="token operator">></span> <span class="token number">6.5</span>  <span class="token comment"># a Series of booleans: True, False, False, True, True</span>

big_shoes <span class="token operator">=</span> shoe_sizes<span class="token punctuation">[</span>big_sizes<span class="token punctuation">]</span>  <span class="token comment"># a Series with values 7, 11, 8</span>

<span class="token comment"># In one line</span>
big_shoes <span class="token operator">=</span> shoe_sizes<span class="token punctuation">[</span>shoe_sizes <span class="token operator">></span> <span class="token number">6.5</span><span class="token punctuation">]</span></code></pre>
<p>You can think of the last statement as saying <em>shoe sizes <strong>where</strong> shoe size is greater than 6.5</em>.</p>
<p>Remember that you can use <em>logical operators</em> <code>&amp;</code> (“and”) and <code>|</code> (“or”) to combine multiple Series of booleans, allowing you to create complex “filtering” statements out of multiple series:</p>
<pre class="language-python"><code><span class="token comment"># access elements from "daily temperatures" where it was raining</span>
<span class="token comment"># or the wind was high</span>
<span class="token comment"># assume all three values are Series.</span>
daily_temperature<span class="token punctuation">[</span>days_rained <span class="token operator">|</span> days_windspeed <span class="token operator">==</span> <span class="token string">"high"</span><span class="token punctuation">]</span></code></pre>
<p>While it is perfectly possible to do similar filtering with a list comprehension, the boolean indexing syntax can be very simple to read and runs quickly. (This is also the normal style of doing filtering in the <code>R</code> programming language).</p>
</div>
</div>
<div id="dataframes" class="section level2" number="9.3">
<h2><span class="header-section-number">9.3</span> DataFrames</h2>
<p>The most common data structure used in <code>pandas</code> (more common than Series in fact!) is a <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html"><strong>DataFrame</strong></a>. A DataFrame represents a <strong>table</strong>, where data is organized into rows and columns. You can think of a DataFrame as being like a Excel spreadsheet or a SQL table.
This book has previously represented tabular data using a <em>list of dictionaries</em>. However, this required you to be careful to make sure that all of the dictionaries shared keys, and did not offer easy ways to interact with the table in terms of its rows or columns. DataFrames give you that functionality!</p>
<p>A DataFrame can be understood as a <em>dictionary of Series</em>, where each Series represents a <strong>column</strong> of the table. The keys of this dictionary are the <em>index labels</em> of the columns, while the the index labels of the Series themselves serve as the labels for the row.</p>
<p class="alert alert-warning">
This structure is distinct from spreadsheets or SQL tables or even lists-of-dictionaries, which are often seen as a collection of <em>observations</em> (rows). Programmatically, DataFrames should primarily be considered as a collection of <em>features</em> (columns), which happen to be sequenced to correspond to observations.
</p>
<p>A DataFrame can be created using the <code>DataFrame()</code> function (a <em>constructor</em> for instances of the class). This function usually takes as an argument a <em>dictionary</em> whose values are Series (or values that can be converted into a Series, such as a list or a dictionary):</p>
<pre class="language-python"><code>name_series <span class="token operator">=</span> pd<span class="token punctuation">.</span>Series<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'Ada'</span><span class="token punctuation">,</span> <span class="token string">'Bob'</span><span class="token punctuation">,</span> <span class="token string">'Chris'</span><span class="token punctuation">,</span> <span class="token string">'Diya'</span><span class="token punctuation">,</span> <span class="token string">'Emma'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
heights <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token number">64</span><span class="token punctuation">,</span> <span class="token number">74</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">71</span><span class="token punctuation">]</span> <span class="token comment"># lists can be made into Series</span>
weights <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token number">135</span><span class="token punctuation">,</span> <span class="token number">156</span><span class="token punctuation">,</span> <span class="token number">139</span><span class="token punctuation">,</span> <span class="token number">144</span><span class="token punctuation">,</span> <span class="token number">152</span><span class="token punctuation">]</span>

people_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span><span class="token string">'name'</span><span class="token punctuation">:</span> name_series<span class="token punctuation">,</span> <span class="token string">'height'</span><span class="token punctuation">:</span> heights<span class="token punctuation">,</span> <span class="token string">'weight'</span><span class="token punctuation">:</span> weights<span class="token punctuation">}</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_df<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight</span>
    <span class="token comment"># 0    Ada      64     135</span>
    <span class="token comment"># 1    Bob      74     156</span>
    <span class="token comment"># 2  Chris      69     139</span>
    <span class="token comment"># 3   Diya      69     144</span>
    <span class="token comment"># 4   Emma      71     152</span></code></pre>
<p>Notice that the rows are labeled with indices <code>0, 1, 2, 3, 4</code> (the indices used by the individual Series—which were made from lists), and the columns are labeled with indices <code>name, height, weight</code> (the keys used in the dictionary that create the DataFrame).</p>
<p>If you end a Jupyter cell with an expression that is a DataFrame, Jupyter will format it as a graphical table—but it’s still a DataFrame!</p>
<p>Although DataFrames variables are often named just <code>df</code> in <code>pandas</code> examples, this is <strong><em>not</em></strong> a good variable name! You can and should use much more descriptive names for your DataFrames (e.g., <code>person_size_table</code> or <code>person_size_df</code>) when doing actual analysis.</p>
<p>It is possible to specify the order of columns in the table using the <code>columns</code> keyword argument, and the order of the rows using the <code>index</code> keyword argument. However, the ordering of rows and columns is often not necessary for data analysis.</p>
<p>It is also possible to create a DataFrame directly from a spreadsheet—such as from a <strong><code>.csv</code></strong> file (containing <strong>c</strong>omma <strong>s</strong>separated <strong>v</strong>alues)—by using the <code>pandas.read_csv()</code> function:</p>
<pre class="language-python"><code>my_dataframe <span class="token operator">=</span> pd<span class="token punctuation">.</span>read_csv<span class="token punctuation">(</span><span class="token string">'path/to/my/file.csv'</span><span class="token punctuation">)</span></code></pre>
<p>See <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html">the IO Tools documentation</a> for details and other file-reading functions.</p>
<div id="dataframe-operations" class="section level3" number="9.3.1">
<h3><span class="header-section-number">9.3.1</span> DataFrame Operations</h3>
<p>Much like Series, DataFrames support a <em>vectorized</em> form of mathematical and relational operators: when the other operand is a <em>scalar</em> (e.g., a single number or a string), then the operation is applied member-wise to each value in the DataFrame:</p>
<pre class="language-python"><code><span class="token comment"># data frame of test scores</span>
test_scores <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span>
    <span class="token string">'math'</span><span class="token punctuation">:</span><span class="token punctuation">[</span><span class="token number">91</span><span class="token punctuation">,</span> <span class="token number">82</span><span class="token punctuation">,</span> <span class="token number">93</span><span class="token punctuation">,</span> <span class="token number">100</span><span class="token punctuation">,</span> <span class="token number">78</span><span class="token punctuation">,</span> <span class="token number">91</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
    <span class="token string">'spanish'</span><span class="token punctuation">:</span><span class="token punctuation">[</span><span class="token number">88</span><span class="token punctuation">,</span> <span class="token number">79</span><span class="token punctuation">,</span> <span class="token number">77</span><span class="token punctuation">,</span> <span class="token number">99</span><span class="token punctuation">,</span> <span class="token number">88</span><span class="token punctuation">,</span> <span class="token number">93</span><span class="token punctuation">]</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span>

<span class="token comment"># A mathematical operator applies to each element in the data frame</span>
curved_scores <span class="token operator">=</span> test_scores <span class="token operator">*</span> <span class="token number">1.02</span>  <span class="token comment"># curve scores up by 2%</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>curved_scores<span class="token punctuation">)</span>
    <span class="token comment">#      math  spanish</span>
    <span class="token comment"># 0   92.82    89.76</span>
    <span class="token comment"># 1   83.64    80.58</span>
    <span class="token comment"># 2   94.86    78.54</span>
    <span class="token comment"># 3  102.00   100.98</span>
    <span class="token comment"># 4   79.56    89.76</span>
    <span class="token comment"># 5   92.82    94.86</span>

<span class="token comment"># A relational operator applies to each element in the data frame</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>curved_scores <span class="token operator">></span> <span class="token number">90</span><span class="token punctuation">)</span>
    <span class="token comment">#     math spanish</span>
    <span class="token comment"># 0   True   False</span>
    <span class="token comment"># 1  False   False</span>
    <span class="token comment"># 2   True   False</span>
    <span class="token comment"># 3   True    True</span>
    <span class="token comment"># 4  False   False</span>
    <span class="token comment"># 5   True    True</span></code></pre>
<p>It is possible for to apply such operators when both operands are DataFrames. In this case, the operation is applied <strong>member-wise</strong>, where values are matched if they have the same row <em>and</em> column label. Note that any value that doesn’t have a pair will instead produce the value <code>NaN</code> (Not a Number). This is not a normal way of working with DataFrames—it is much more common to access individual rows and columns and work with those (e.g., make a new column that is the sum of two others); see below for details.</p>
</div>
<div id="dataframe-methods" class="section level3" number="9.3.2">
<h3><span class="header-section-number">9.3.2</span> DataFrame Methods</h3>
<p>Also like Series, DataFrames objects support a large number of methods that can be used to modify or interrogate the data structure.</p>
<pre class="language-python"><code>people_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span>
  <span class="token string">'name'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token string">'Ada'</span><span class="token punctuation">,</span> <span class="token string">'Bob'</span><span class="token punctuation">,</span> <span class="token string">'Chris'</span><span class="token punctuation">,</span> <span class="token string">'Diya'</span><span class="token punctuation">,</span> <span class="token string">'Emma'</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">'height'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">64</span><span class="token punctuation">,</span> <span class="token number">74</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">71</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">'weight'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">135</span><span class="token punctuation">,</span> <span class="token number">156</span><span class="token punctuation">,</span> <span class="token number">139</span><span class="token punctuation">,</span> <span class="token number">144</span><span class="token punctuation">,</span> <span class="token number">152</span><span class="token punctuation">]</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_df<span class="token punctuation">)</span> <span class="token comment"># for reference</span>
    <span class="token comment">#     name  height  weight</span>
    <span class="token comment"># 0    Ada      64     135</span>
    <span class="token comment"># 1    Bob      74     156</span>
    <span class="token comment"># 2  Chris      69     139</span>
    <span class="token comment"># 3   Diya      69     144</span>
    <span class="token comment"># 4   Emma      71     152</span>

<span class="token comment"># The `head(n)` method returns a DataFrame containing only the first `n` ROWS</span>
<span class="token comment"># Note that this is a *new DataFrame*</span>
<span class="token comment"># The `tail(n)` method works similarly, but returns the last `n` ROWS.</span>
first_2_rows <span class="token operator">=</span> people_df<span class="token punctuation">.</span>head<span class="token punctuation">(</span><span class="token number">2</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>first_2_rows<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight</span>
    <span class="token comment"># 0    Ada      64     135</span>
    <span class="token comment"># 1    Bob      74     156</span>

<span class="token comment"># The `assign()` method returns a new DataFrame with additional columns</span>
<span class="token comment"># Each new column is named by the argument name and given the argument value</span>
<span class="token comment"># This does not modify the original DataFrame!</span>
people_with_hats_df <span class="token operator">=</span> people_df<span class="token punctuation">.</span>assign<span class="token punctuation">(</span>wearing_hat<span class="token operator">=</span><span class="token punctuation">[</span><span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_with_hats_df<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight  wearing_hat</span>
    <span class="token comment"># 0    Ada      64     135        False</span>
    <span class="token comment"># 1    Bob      74     156         True</span>
    <span class="token comment"># 2  Chris      69     139        False</span>
    <span class="token comment"># 3   Diya      69     144         True</span>
    <span class="token comment"># 4   Emma      71     152         True</span>

extra_columns_df <span class="token operator">=</span> people_df<span class="token punctuation">.</span>assign<span class="token punctuation">(</span>col1<span class="token operator">=</span><span class="token number">1</span><span class="token punctuation">,</span> col2<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> col3<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>extra_columns_df<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight  col1  col2  col3</span>
    <span class="token comment"># 0    Ada      64     135     1     2     3</span>
    <span class="token comment"># 1    Bob      74     156     1     2     3</span>
    <span class="token comment"># 2  Chris      69     139     1     2     3</span>
    <span class="token comment"># 3   Diya      69     144     1     2     3</span>
    <span class="token comment"># 4   Emma      71     152     1     2     3</span>

<span class="token comment"># The `drop()` function returns a new DataFrame with rows or columns _removed</span>
<span class="token comment"># The `axis` argument indicates whether to drop a row (drop by `index`)</span>
<span class="token comment"># or a column (drop by `column`)</span>
names_only_df <span class="token operator">=</span> people_df<span class="token punctuation">.</span>drop<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'height'</span><span class="token punctuation">,</span> <span class="token string">'weight'</span><span class="token punctuation">]</span><span class="token punctuation">,</span> axis<span class="token operator">=</span><span class="token string">"columns"</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>names_only_df<span class="token punctuation">)</span> <span class="token comment"># A DataFrame with a single column!</span>
    <span class="token comment">#     name</span>
    <span class="token comment"># 0    Ada</span>
    <span class="token comment"># 1    Bob</span>
    <span class="token comment"># 2  Chris</span>
    <span class="token comment"># 3   Diya</span>
    <span class="token comment"># 4   Emma</span>

select_rows_df <span class="token operator">=</span> people_df<span class="token punctuation">.</span>drop<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span> axis<span class="token operator">=</span><span class="token string">"index"</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>select_rows_df<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight</span>
    <span class="token comment"># 0   Ada      64     135</span>
    <span class="token comment"># 3  Diya      69     144</span>
    <span class="token comment"># 4  Emma      71     152</span></code></pre>
<p>While the <code>drop()</code> function can be used to remove rows or columns, it’s more common to use boolean indexing to access the desired rows &amp; columns; see below for details. When working with data structures in programming, it’s better to think about “what you want to keep” rather than “what you want to remove”.</p>
<p>DataFrames also support most of the same methods that Series do, such as <code>mean()</code>, <code>any()</code>, <code>describe()</code>, etc. With a DataFrame, these methods are applied <em>per column</em> (not per row). So calling <code>mean()</code> on a DataFrame will calculate the mean of <em>each column</em> in that DataFrame, producing a new Series of those values:</p>
<pre class="language-python"><code><span class="token comment"># continuing from previous example</span>
people_measures_only_df <span class="token operator">=</span> people_df<span class="token punctuation">.</span>drop<span class="token punctuation">(</span><span class="token string">"name"</span><span class="token punctuation">,</span> axis<span class="token operator">=</span><span class="token string">"columns"</span><span class="token punctuation">)</span>

<span class="token comment"># Get the statistical mean (average) of each column of the DataFrame</span>
people_means_series <span class="token operator">=</span> people_measures_only_df<span class="token punctuation">.</span>mean<span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_means_series<span class="token punctuation">)</span>
    <span class="token comment"># height     69.4</span>
    <span class="token comment"># weight    145.2</span>
    <span class="token comment"># dtype: float64</span></code></pre>
<p>To be clear: the result of calling e.g., <code>.mean()</code> is a <em>Series</em>, not a DataFrame (remember that Series can have indices that are not numeric, like a dictionary does!)</p>
<p>Series methods like <code>.describe()</code> which returned a Series are also support: in this case, they return a Series for each column in the DataFrame… compiled into a new DataFrame!</p>
<pre class="language-python"><code>people_description_df <span class="token operator">=</span> people_measures_only_df<span class="token punctuation">.</span>describe<span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_description_df<span class="token punctuation">)</span>
    <span class="token comment">#           height      weight</span>
    <span class="token comment"># count   5.000000    5.000000</span>
    <span class="token comment"># mean   69.400000  145.200000</span>
    <span class="token comment"># std     3.646917    8.757854</span>
    <span class="token comment"># min    64.000000  135.000000</span>
    <span class="token comment"># 25%    69.000000  139.000000</span>
    <span class="token comment"># 50%    69.000000  144.000000</span>
    <span class="token comment"># 75%    71.000000  152.000000</span>
    <span class="token comment"># max    74.000000  156.000000</span></code></pre>
<p>The <code>height</code> column of the <code>people_description_df</code> the result of calling <code>describe()</code> on the DataFrame’s <code>height</code> column Series.</p>
<p>If a Series method would return a <em>scalar</em> (a single value, as with <code>mean()</code> or <code>any()</code>), then then calling that method on a DataFrame returns a Series whose labels are the column labels. If the Series method instead would return a <em>Series</em> (multiple values, as with <code>head()</code> or <code>describe()</code>), then calling that method on a DataFrame returns a new DataFrame whose columns are each of the resulting Series. So as a general rule: if you’re expecting one value per column, you’ll get a Series of those values; if you’re expecting multiple values per column, you’ll get a DataFrame of those values.</p>
<p class="alert alert-default">
This also means that you can sometimes “double-call” methods to reduce them further. For example, <code>df.all()</code> returns a Series of whether each column contains only <code>True</code> values; <code>df.all().all()</code> would check if <em>that</em> Series contains all <code>True</code> values (thus checking <em>all</em> columns have all <code>True</code> value, i.e., the entire table is all <code>True</code> values).
</p>
</div>
<div id="accessing-dataframes" class="section level3" number="9.3.3">
<h3><span class="header-section-number">9.3.3</span> Accessing DataFrames</h3>
<p>It is possible to access values of a DataFrame by index, just as you can with Series. But DataFrames are two-dimensional data structures, so they have both an <strong>index</strong> (which represents the <em>row</em>) and <strong>columns</strong> (which represent the columns). Thus you can talk about the index or column of a DataFrame to refer to a specific element. Similar to the Series’ <code>index</code> attribute, you can access a list of indices or a list of columns by using the <code>index</code> and <code>column</code> attributes respectively:</p>
<pre class="language-python"><code>people_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span>
  <span class="token string">'name'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token string">'Ada'</span><span class="token punctuation">,</span> <span class="token string">'Bob'</span><span class="token punctuation">,</span> <span class="token string">'Chris'</span><span class="token punctuation">,</span> <span class="token string">'Diya'</span><span class="token punctuation">,</span> <span class="token string">'Emma'</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">'height'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">64</span><span class="token punctuation">,</span> <span class="token number">74</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">69</span><span class="token punctuation">,</span> <span class="token number">71</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">'weight'</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">135</span><span class="token punctuation">,</span> <span class="token number">156</span><span class="token punctuation">,</span> <span class="token number">139</span><span class="token punctuation">,</span> <span class="token number">144</span><span class="token punctuation">,</span> <span class="token number">152</span><span class="token punctuation">]</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span>

people_indices <span class="token operator">=</span> <span class="token builtin">list</span><span class="token punctuation">(</span>people_df<span class="token punctuation">.</span>index<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_indices<span class="token punctuation">)</span> <span class="token comment"># [0, 1, 2, 3, 4]</span>

people_columns <span class="token operator">=</span> <span class="token builtin">list</span><span class="token punctuation">(</span>people_df<span class="token punctuation">.</span>columns<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_columns<span class="token punctuation">)</span> <span class="token comment"># ['name', 'height', 'weight']</span></code></pre>
<p>It is also possible to access DataFrame values using <strong>bracket notation</strong>. As DataFrames are commonly viewed as a <em>dictionary of columns</em>, the value used inside the brackets is the <em>label of the column</em>:</p>
<pre class="language-python"><code><span class="token comment"># get the height column using bracket notation</span>
height_column <span class="token operator">=</span> people_df<span class="token punctuation">[</span><span class="token string">'height'</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>height_column<span class="token punctuation">)</span>
    <span class="token comment"># 0    64</span>
    <span class="token comment"># 1    74</span>
    <span class="token comment"># 2    69</span>
    <span class="token comment"># 3    69</span>
    <span class="token comment"># 4    71</span>
    <span class="token comment"># Name: height, dtype: int64</span></code></pre>
<p>If you try to put a row label or index inside of the brackets, you’ll get a <code>KeyError</code> that the key isn’t defined—similar to if you tried to access a key that wasn’t in a dictionary.</p>
<p>While bracket notatoin works, it is often more common to refer to individual columns using <strong>dot notation</strong>, treating each column as an <em>attribute</em> or <em>property</em> of the DataFrame object:</p>
<pre class="language-python"><code><span class="token comment"># get the height column using dot notation</span>
height_column <span class="token operator">=</span> people_df<span class="token punctuation">.</span>height
<span class="token keyword">print</span><span class="token punctuation">(</span>height_column<span class="token punctuation">)</span>
    <span class="token comment"># 0    64</span>
    <span class="token comment"># 1    74</span>
    <span class="token comment"># 2    69</span>
    <span class="token comment"># 3    69</span>
    <span class="token comment"># 4    71</span>
    <span class="token comment"># Name: height, dtype: int64</span></code></pre>
<p>This alternate syntax does <em>exactly the same thing</em>—it’s just written in a different way! The dot notation can be slightly easier to type and to read (less punctuation is involved), making it often preferred. Being able to use dot notation this way is a feature of DataFrames, and doesn’t apply to normal dictionaries.</p>
<p>As with Series, it is also possible to select <em>multiple</em> columns by using a <em>list</em> or sequence inside the brackets. This will produce a new DataFrame (a “sub-table”)</p>
<pre class="language-python"><code>list_of_columns <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'name'</span><span class="token punctuation">,</span> <span class="token string">'height'</span><span class="token punctuation">]</span>

subtable <span class="token operator">=</span> people_df<span class="token punctuation">[</span>list_of_columns<span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>subtable<span class="token punctuation">)</span>
    <span class="token comment">#     name  height</span>
    <span class="token comment"># 0    Ada      64</span>
    <span class="token comment"># 1    Bob      74</span>
    <span class="token comment"># 2  Chris      69</span>
    <span class="token comment"># 3   Diya      69</span>
    <span class="token comment"># 4   Emma      71</span>

<span class="token comment"># same as above, but in one line:</span>
subtable <span class="token operator">=</span> people_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">'name'</span><span class="token punctuation">,</span> <span class="token string">'height'</span><span class="token punctuation">]</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>subtable<span class="token punctuation">)</span></code></pre>
<p>You can’t access columns by their position, so cannot specify a list of numbers—it would need to be a list of column names (usually strings).</p>
<p>However: specifying a <strong>slice</strong> (using a colon <strong><code>:</code></strong>) will select by <em>row</em> position, not column position! I do not know wherefore this inconsistency, other than “convenience”.</p>
<pre class="language-python"><code><span class="token comment"># get ROWS 1 through 4 (not inclusive)</span>
subtable <span class="token operator">=</span> people_df<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">:</span><span class="token number">4</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>subtable<span class="token punctuation">)</span>
    <span class="token comment">#     name  height  weight</span>
    <span class="token comment"># 1    Bob      74     156</span>
    <span class="token comment"># 2  Chris      69     139</span>
    <span class="token comment"># 3   Diya      69     144</span></code></pre>
<p>It is also possible to use bracket notation when <em>assiging</em> values to a DataFrame. This is a useful way to add additional columns to the DataFrame if you don’t want to use the <code>assign()</code> method—in particular, this will modify the DataFrame “in place” (without making a copy)… though potentially lead to errors with changing referenced values.</p>
<pre class="language-python"><code>people_df<span class="token punctuation">[</span><span class="token string">'wearing_hat'</span><span class="token punctuation">]</span> <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">False</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token boolean">True</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>people_df<span class="token punctuation">)</span>
    <span class="token comment">#      name  height  weight  wearing_hat</span>
    <span class="token comment"># 0    Ada      64     135        False</span>
    <span class="token comment"># 1    Bob      74     156         True</span>
    <span class="token comment"># 2  Chris      69     139        False</span>
    <span class="token comment"># 3   Diya      69     144         True</span>
    <span class="token comment"># 4   Emma      71     152         True</span></code></pre>
<p>Note that if you try to use an assignment operator to modify a <em>subset</em> (called a “slice”) of a DataFrame—such as one you selected with bracket notation—you will get a <code>SettingWithCopyWarning</code>.</p>
<pre class="language-python"><code><span class="token comment"># Change the element in the `wearing_hat` column and row 3 to be False</span>
<span class="token comment"># Diya is no longer wearing a hat</span>
people_df<span class="token punctuation">[</span><span class="token string">'wearing_hat'</span><span class="token punctuation">]</span><span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span> <span class="token operator">=</span> <span class="token boolean">False</span> <span class="token comment"># &lt;SettingWithCopyWarning></span></code></pre>
<p>This is a warning, not an error; your operation will succeed but it will look like something went front. This warning is shown because such operations have a chance of creating unpredictable results, so <code>pandas</code> gives you a warning not to do that. See <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy">the user guide</a> for a more detailed discussion. If you get this warning, you should adjust your indexing so you’re not getting a subset… or just use a different method of assigning new values to a variable.</p>
<p>Because DataFrames support multiple indexes, it is possible to use <strong>boolean indexing</strong> (as with Series), allowing you to <em>filter</em> for rows based the values in their columns. Note carefully that boolean indexing selects by <em>row</em> (not by column)—a list of column names selects columns, but a list of booleans selects rows!</p>
<pre class="language-python"><code><span class="token comment"># Access the `height` column (a Series) and compare to 70</span>
are_tall_series <span class="token operator">=</span> people_df<span class="token punctuation">.</span>height <span class="token operator">></span> <span class="token number">70</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>are_tall_series<span class="token punctuation">)</span>
    <span class="token comment"># 0    False</span>
    <span class="token comment"># 1     True</span>
    <span class="token comment"># 2    False</span>
    <span class="token comment"># 3    False</span>
    <span class="token comment"># 4     True</span>
    <span class="token comment"># Name: height, dtype: bool</span>

tall_people_df <span class="token operator">=</span> people_df<span class="token punctuation">[</span>are_tall_series<span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>tall_people_df<span class="token punctuation">)</span>
    <span class="token comment"># name  height  weight</span>
    <span class="token comment"># 1   Bob      74     156</span>
    <span class="token comment"># 4  Emma      71     152</span>

<span class="token comment"># As a single line:</span>
tall_people_df <span class="token operator">=</span> people_df<span class="token punctuation">[</span>people_df<span class="token punctuation">.</span>height <span class="token operator">></span> <span class="token number">70</span><span class="token punctuation">]</span></code></pre>
<p>Note that <code>people_df.height</code> is a Series (a column), so <code>people_df.height &gt; 70</code> produces a Series of boolean values (<code>True</code> and <code>False</code>). This Series is used to determine <em>which</em> rows to return from the DataFrame—each row that corresponds with a <code>True</code> index.</p>
<p>Finally, DataFrames also provide two <em>attributes</em> (properties) used to “quick access” values: <strong><code>loc</code></strong>, which provides an “index” (lookup table) based on index labels, and <strong><code>iloc</code></strong>, which provides an “index” (lookup table) based on row and column positions. Each of these “indexes” can be thought of as a <em>dictionary</em> whose values are the individual elements in the DataFrame, and whose keys can therefore be used to access those values using <strong>bracket notation</strong>. The dictionaries support multiple types of keys (using label-based <code>loc</code> as an example):</p>
<table>
<colgroup>
<col width="39%" />
<col width="26%" />
<col width="33%" />
</colgroup>
<thead>
<tr class="header">
<th align="left">Key Type</th>
<th align="left">Description</th>
<th align="left">Example</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left"><code>df.loc[row_label]</code></td>
<td align="left">An individual row</td>
<td align="left"><code>df.loc['Ada']</code> (the row labeled <code>Ada</code>)</td>
</tr>
<tr class="even">
<td align="left"><code>df.loc[row_label_list]</code></td>
<td align="left">A list of row labels</td>
<td align="left"><code>df.loc[['Ada','Bob']]</code> (the rows labeled <code>Ada</code> and <code>Bob</code>)</td>
</tr>
<tr class="odd">
<td align="left"><code>df.loc[row_label_slice]</code></td>
<td align="left">A <em>slice</em> of row labels</td>
<td align="left"><code>df.loc['Bob':'Diya']</code> (the rows from <code>Bob</code> to <code>Diya</code>. Note that this is an <em>inclusive</em> slice!)</td>
</tr>
<tr class="even">
<td align="left"><code>df.loc[row_label, col_label]</code></td>
<td align="left">A <em>tuple</em> of <code>(row, column)</code></td>
<td align="left"><code>df.loc['Ada', 'height']</code> (the value at row <code>Ada</code>, column <code>height</code>)</td>
</tr>
<tr class="odd">
<td align="left"><code>df.loc[row_label_seq, col_label_seq]</code></td>
<td align="left">A <em>tuple</em> of label lists or slices</td>
<td align="left"><code>df.loc['Bob':'Diya', ['height','weight']]</code> (the rows from <code>Bob</code> to <code>Diya</code> with the columns <code>height</code> and <code>weight</code>)</td>
</tr>
<tr class="even">
<td align="left"><code>df.iloc[row_index, col_index]</code></td>
<td align="left">A <em>tuple</em> of <code>(row_number, column_number)</code></td>
<td align="left"><code>df.iloc[0, 2]</code> (the value at 0th row ad 2th column)</td>
</tr>
<tr class="odd">
<td align="left"><code>df.iloc[row_index_seq, col_index_seq]</code></td>
<td align="left">A <em>tuple</em> of label indices or slices</td>
<td align="left"><code>df.iloc[1:3, [1,2]]</code> (the 1th to 3th rows (exclusive) with the 1th and 2th columns</td>
</tr>
</tbody>
</table>
<p>The <code>loc</code> attribute thus lets you use bracket notation, but specifying the rows <em>and</em> columns at the same time, separated by a comma (because you’re passing in a tuple)! The <code>iloc</code> attribute works similarly, but instead of specifying the labels for the rows and columns, you specify the positional index.</p>
<p>You can also use the boundless slice <code>:</code> to refer to “all elements”. So for example:</p>
<pre class="language-python"><code>people_df<span class="token punctuation">.</span>loc<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> <span class="token string">'height'</span><span class="token punctuation">]</span>  <span class="token comment"># get all rows, but just the 'height' column</span></code></pre>
</div>
</div>
<div id="grouping" class="section level2" number="9.4">
<h2><span class="header-section-number">9.4</span> Grouping</h2>
<p>The <code>pandas</code> library is designed and most useful when working with large data sets—which might have hundreds or thousands of rows. And when working with such data, you often want to perform aggregations and get statistics on only <em>part</em> of the data set, rather than the whole. For example, rather than calculate the average temperature across an entire year’s worth of recordings, maybe you want to get the average of each month, or the average on each day of the week. What you’d like to do is to break the dataset into different <strong>groups</strong> (subsets of the data) and then apply existing methods like <code>max()</code> or <code>mean()</code> to <em>each of those groups</em>. While it’s possible to do this by accessing the subsets you want and then using e.g., a loop to process each one, <code>pandas</code> provides additional functionality to create and work with subgroups of a DataFrame.</p>
<p>As a worked example, consider the below DataFrame of student exam scores. You might be interested in comparing scores within or between section, or within or between student year.</p>
<pre class="language-python"><code><span class="token comment"># A DataFrame of student scores on exams</span>
scores_df <span class="token operator">=</span> pd<span class="token punctuation">.</span>DataFrame<span class="token punctuation">(</span><span class="token punctuation">{</span>
  <span class="token string">"name"</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token string">'Ada'</span><span class="token punctuation">,</span> <span class="token string">'Bob'</span><span class="token punctuation">,</span> <span class="token string">'Chris'</span><span class="token punctuation">,</span> <span class="token string">'Diya'</span><span class="token punctuation">,</span> <span class="token string">'Emma'</span><span class="token punctuation">,</span> <span class="token string">'Fred'</span><span class="token punctuation">,</span> <span class="token string">'George'</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">"section"</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token string">'A'</span><span class="token punctuation">,</span> <span class="token string">'A'</span><span class="token punctuation">,</span> <span class="token string">'A'</span><span class="token punctuation">,</span> <span class="token string">'A'</span><span class="token punctuation">,</span> <span class="token string">'B'</span><span class="token punctuation">,</span> <span class="token string">'B'</span><span class="token punctuation">,</span> <span class="token string">'B'</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">"year"</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">"midterm"</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">92</span><span class="token punctuation">,</span> <span class="token number">90</span><span class="token punctuation">,</span> <span class="token number">99</span><span class="token punctuation">,</span> <span class="token number">95</span><span class="token punctuation">,</span> <span class="token number">83</span><span class="token punctuation">,</span> <span class="token number">96</span><span class="token punctuation">,</span> <span class="token number">87</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
  <span class="token string">"final"</span><span class="token punctuation">:</span> <span class="token punctuation">[</span><span class="token number">88</span><span class="token punctuation">,</span> <span class="token number">86</span><span class="token punctuation">,</span> <span class="token number">80</span><span class="token punctuation">,</span> <span class="token number">95</span><span class="token punctuation">,</span> <span class="token number">85</span><span class="token punctuation">,</span> <span class="token number">94</span><span class="token punctuation">,</span> <span class="token number">92</span><span class="token punctuation">]</span><span class="token punctuation">,</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>scores_df<span class="token punctuation">)</span>
    <span class="token comment">#      name section  year  midterm  final</span>
    <span class="token comment"># 0     Ada       A     4       92     88</span>
    <span class="token comment"># 1     Bob       A     3       90     86</span>
    <span class="token comment"># 2   Chris       A     1       99     80</span>
    <span class="token comment"># 3    Diya       A     3       95     95</span>
    <span class="token comment"># 4    Emma       B     2       83     85</span>
    <span class="token comment"># 5    Fred       B     1       96     94</span>
    <span class="token comment"># 6  George       B     4       87     92</span></code></pre>
<p>The <strong><code>groupby()</code></strong> method is called on a DataFrame and “separates” the <em>rows</em> into groups. At it’s most basic, the <code>groupby()</code> method takes an argument the column name to “group” the rows by—all rows that have the same value in that column will be placed in the same group. The <code>groupby()</code> method returns a new <em>GroupBy</em> value, which is a special data structure that tracks which <em>rows</em> have been put into different groups. You can use the <code>.groups</code> <em>attribute</em> to get a dictionary of groups and which row indices are in each, or use the <code>get_group(column)</code> method to get a DataFrame of just that group:</p>
<pre class="language-python"><code><span class="token comment"># Separate into groups by "section" column</span>
by_section_groups <span class="token operator">=</span> scores_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token string">'section'</span><span class="token punctuation">)</span>

<span class="token comment"># Get a dictionary of which rows are in which group</span>
<span class="token comment"># The key is the group label; the value is a list of rows indices in the group</span>
group_dict <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span>groups
<span class="token keyword">print</span><span class="token punctuation">(</span>group_dict<span class="token punctuation">)</span> <span class="token comment"># {'A': [0, 1, 2, 3], 'B': [4, 5, 6]}</span>

<span class="token comment"># Get the DataFrame subset representing a single group</span>
section_a_df <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span>get_group<span class="token punctuation">(</span><span class="token string">'A'</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>section_a_df<span class="token punctuation">)</span>
    <span class="token comment">#     name section  year  midterm  final</span>
    <span class="token comment"># 0    Ada       A     4       92     88</span>
    <span class="token comment"># 1    Bob       A     3       90     86</span>
    <span class="token comment"># 2  Chris       A     1       99     80</span>
    <span class="token comment"># 3   Diya       A     3       95     95</span></code></pre>
<p>Although we talk about the <code>groupby()</code> method as “separating” or “breaking” a DataFrame into groups, it doesn’t actually modify the DataFrame at all! Similarly, the GroupBy value produces is a single value; it just has metadata indicating which rows below to which groups.</p>
<p>It is possible to specify more complex grouping criteria as argument(s) to the <code>groupby()</code> method. For example, you can specify a <em>list of columns</em> as an argument, in which case the rows will be grouped first by one column, and then by the next:</p>
<pre class="language-python"><code><span class="token comment"># Group by "section" and "year"</span>
section_and_year_groups <span class="token operator">=</span> scores_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'section'</span><span class="token punctuation">,</span> <span class="token string">'year'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>section_and_year_groups<span class="token punctuation">.</span>groups<span class="token punctuation">)</span>
    <span class="token comment"># {</span>
    <span class="token comment">#   ('A', 1): [2],</span>
    <span class="token comment">#   ('A', 3): [1, 3],</span>
    <span class="token comment">#   ('A', 4): [0],</span>
    <span class="token comment">#   ('B', 1): [5],</span>
    <span class="token comment">#   ('B', 2): [4],</span>
    <span class="token comment">#   ('B', 4): [6]</span>
    <span class="token comment"># }</span></code></pre>
<p>In the above example, there is a group for each “section and year” combination. (In this small example dataset, many groups have just a single row).</p>
<div id="aggregation" class="section level3" number="9.4.1">
<h3><span class="header-section-number">9.4.1</span> Aggregation</h3>
<p>Groups rows of a DataFrame isn’t particularly useful on its own; you can achieve the same ends just by using regular access techniques (bracket notation, etc). The real purpose of grouping is to be able to apply <em>aggregation methods per group</em>&amp;madsh;you can quickly calculate statistics for each group at once in order to compare them!</p>
<p>The <code>pandas</code> library calls this the <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html"><strong>split-apply-combine</strong></a> process, which has 3 steps:</p>
<ol style="list-style-type: decimal">
<li><strong>Split</strong> the data into groups</li>
<li><strong>Apply</strong> some operation to each individual group</li>
<li><strong>Combine</strong> the results back into a single data structure</li>
</ol>
<p>You split data into groups by using the <code>groupby()</code> method. You apply <strong>aggregation functions</strong> (such as <code>mean()</code>, <code>max()</code>, etc) to each group. And then because you’re working on a GroupBy value, those results are automatically combined back into a DataFrame. In a way, an <strong>aggregation</strong> combines steps 2 and 3.</p>
<p>You can apply an aggregation operation to each of the groups by calling that method on the GroupBy value. The method will automatically be applied <em>each</em> group (as if they were separate DataFrames). The method will then return a new DataFrame, whose rows are the results <em>per group</em>.</p>
<pre class="language-python"><code> <span class="token comment"># A DataFrame with only the section and numeric columns</span>
scores_only_df <span class="token operator">=</span> scores_df<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token string">'section'</span><span class="token punctuation">,</span> <span class="token string">'midterm'</span><span class="token punctuation">,</span> <span class="token string">'final'</span><span class="token punctuation">]</span><span class="token punctuation">]</span>

<span class="token comment"># Split into groups</span>
by_section_groups <span class="token operator">=</span> scores_only_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token string">'section'</span><span class="token punctuation">)</span>

<span class="token comment"># Apply the `mean()` function and Combine into new DataFrame</span>
mean_by_section_df <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span>mean<span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>mean_by_section_df<span class="token punctuation">)</span>
    <span class="token comment">#            midterm      final</span>
    <span class="token comment"># section</span>
    <span class="token comment"># A        94.000000  87.250000</span>
    <span class="token comment"># B        88.666667  90.333333</span>

<span class="token comment"># Apply the `max()` function and Combine into new DataFrame</span>
max_by_section_df <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span><span class="token builtin">max</span><span class="token punctuation">(</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>max_by_section_df<span class="token punctuation">)</span>
    <span class="token comment">#          midterm  final</span>
    <span class="token comment"># section</span>
    <span class="token comment"># A             99     95</span>
    <span class="token comment"># B             96     94</span></code></pre>
<p>The above code calculates the statistics fir of each column (exactly as happens when called on a DataFrame); but it calculates it for each group separately. The results for each group is a different row in the resulting DataFrame.</p>
<p>There are a number of <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#built-in-aggregation-methods">built-in aggregation methods</a> such as <code>max()</code> and <code>mean()</code>; some common ones are demonstrated in the previous sections.</p>
<p>A useful “generic” method for performing such aggregations is the <strong><code>agg()</code></strong> method. This method supports a variety of arguments allowing you to customize what aggregations are performed on the groups. For example, you can pass in a string with the name of the aggregation method to call, or a <em>list of strings</em> of methods to call:</p>
<pre class="language-python"><code><span class="token comment"># Apply multiple aggregation functions at once</span>
range_stats_df <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span>agg<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'min'</span><span class="token punctuation">,</span> <span class="token string">'mean'</span><span class="token punctuation">,</span> <span class="token string">'max'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>range_stats_df<span class="token punctuation">)</span>
    <span class="token comment">#         midterm                final</span>
    <span class="token comment">#             min       mean max   min       mean max</span>
    <span class="token comment"># section</span>
    <span class="token comment"># A            90  94.000000  99    80  87.250000  95</span>
    <span class="token comment"># B            83  88.666667  96    85  90.333333  94</span></code></pre>
<p>This is a normal DataFrame, it’s just that the columns are <em>tuples</em> (e.g., <code>('midterm', 'min')</code>) instead of strings. The <code>pandas</code> library will print it out in a readable way.</p>
<p>You can further customize what aggregations you perform by instead passing the <code>agg()</code> multiple <em>names</em> arguments. The name of the argument will be the desired column label in the resulting DataFrame, and the value of the argument should be a <em>tuple</em> of the column to aggregate and which aggregate function to apply. These are called <strong>named aggregations</strong>:</p>
<pre class="language-python"><code><span class="token comment"># Apply specific aggregations to specific columns, producing named results</span>
<span class="token comment"># (each argument is on its own line just for readability)</span>
custom_stats_df <span class="token operator">=</span> by_section_groups<span class="token punctuation">.</span>agg<span class="token punctuation">(</span>
  avg_mid<span class="token operator">=</span><span class="token punctuation">(</span><span class="token string">'midterm'</span><span class="token punctuation">,</span> <span class="token string">'mean'</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
  avg_final<span class="token operator">=</span><span class="token punctuation">(</span><span class="token string">'final'</span><span class="token punctuation">,</span> <span class="token string">'mean'</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
  max_final<span class="token operator">=</span><span class="token punctuation">(</span><span class="token string">'final'</span><span class="token punctuation">,</span> <span class="token string">'max'</span><span class="token punctuation">)</span>
<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>custom_stats_df<span class="token punctuation">)</span>
    <span class="token comment">#            avg_mid  avg_final  max_final</span>
    <span class="token comment"># section</span>
    <span class="token comment"># A        94.000000  87.250000         95</span>
    <span class="token comment"># B        88.666667  90.333333         94</span></code></pre>
<p>This syntax is particularly useful for creating “clean” DataFrames that can be used for further processing.</p>
<p>There are many other options for using the <code>agg()</code> method as well. See <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#the-aggregate-method">the <code>pandas</code> user guide</a> for more details and examples.</p>
<p>When doing an aggregation when grouping by multiple columns, you can end up with a DataFrame whose indices are <em>tuples</em>:</p>
<pre class="language-python"><code>final_by_year_section_df <span class="token operator">=</span> scores_df<span class="token punctuation">.</span>groupby<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">'section'</span><span class="token punctuation">,</span> <span class="token string">'year'</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">.</span>agg<span class="token punctuation">(</span>
  max_final<span class="token operator">=</span><span class="token punctuation">(</span><span class="token string">'final'</span><span class="token punctuation">,</span> <span class="token string">'max'</span><span class="token punctuation">)</span>
<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>final_by_year_section_df<span class="token punctuation">.</span>index<span class="token punctuation">)</span>
<span class="token comment"># MultiIndex([('A', 1),</span>
<span class="token comment">#             ('A', 3),</span>
<span class="token comment">#             ('A', 4),</span>
<span class="token comment">#             ('B', 1),</span>
<span class="token comment">#             ('B', 2),</span>
<span class="token comment">#             ('B', 4)],</span>
<span class="token comment">#            names=['section', 'year'])</span></code></pre>
<p>A <a href="https://pandas.pydata.org/docs/user_guide/advanced.html">MultiIndex</a> is an advanced concept from <code>pandas</code>; while there are lots of ways of working with such indices, at a basic level you can think of them has just being a situation where each index is a <code>tuple</code>.</p>
<p>This can work fine if you’re considering data grouped by multiple columns. However, sometimes you will want to “go back” and only have data indexed by a single value. You can achieve this by using the <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html"><strong>reset_index()</strong></a> method,. This method can be used to pull a column out of the index and make it into a regular column of the data. It takes as an argument which column you want to remove from the index:</p>
<pre class="language-python"><code><span class="token keyword">print</span><span class="token punctuation">(</span>final_by_year_section_df<span class="token punctuation">)</span>
<span class="token comment">#               max_final</span>
<span class="token comment"># section year</span>
<span class="token comment"># A       1            80</span>
<span class="token comment">#         3            95</span>
<span class="token comment">#         4            88</span>
<span class="token comment"># B       1            94</span>
<span class="token comment">#         2            85</span>
<span class="token comment">#         4            92</span>

final_by_section_df <span class="token operator">=</span> final_by_year_section_df<span class="token punctuation">.</span>reset_index<span class="token punctuation">(</span><span class="token string">'year'</span><span class="token punctuation">)</span>
<span class="token comment">#         year  max_final</span>
<span class="token comment"># section</span>
<span class="token comment"># A           1         80</span>
<span class="token comment"># A           3         95</span>
<span class="token comment"># A           4         88</span>
<span class="token comment"># B           1         94</span>
<span class="token comment"># B           2         85</span>
<span class="token comment"># B           4         92</span>

<span class="token keyword">print</span><span class="token punctuation">(</span>final_by_section_df<span class="token punctuation">.</span>index<span class="token punctuation">)</span>
<span class="token comment"># Index(['A', 'A', 'A', 'B', 'B', 'B'], dtype='object', name='section')</span></code></pre>
<p>The differences between these DataFrames is subtle. In the second one, <code>year</code> has been moved to a column (where the <code>section</code> is the index label—note that each element has a section listed on its row). This would allow you to do further analysis on the data (which is now organized only by section, with year as a feature rather than an index).</p>
<p>Overall, grouping can allow you to quickly compare different subsets of your data. In doing so, you’re redefining your <strong>unit of analysis</strong>. Grouping lets you frame your analysis question in terms of comparing groups of observations, rather than individual observations. This form of abstraction makes it easier to ask and answer complex questions about your data.</p>
</div>
</div>
<div id="resources-8" class="section level2 unnumbered">
<h2>Resources</h2>
<p>This chapter provides only an introduction to the <code>pandas</code> library; there is <em>a lot</em> more to this library, with many more functions, structures, approaches, etc. To learn more about using <code>pandas</code>, check out the official <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/index.html">user guide</a>, <a href="https://pandas.pydata.org/pandas-docs/stable/getting_started/tutorials.html">tutorials</a>, and <a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/cookbook.html">“recipes”</a>.</p>
<div class="list-condensed">
<ul>
<li><a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html">10 minutes to <code>pandas</code> (<code>pandas</code> docs)</a> a basic set of examples</li>
<li><a href="https://pandas.pydata.org/pandas-docs/stable/getting_started/tutorials.html">Tutorials (<code>pandas</code> docs)</a> a list and guide to various tutorials (of mixed quality)</li>
<li><a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/index.html">User guide (<code>pandas</code> docs)</a></li>
<li><a href="https://pandas.pydata.org/pandas-docs/stable/user_guide/cookbook.html">Cookbook (<code>pandas</code> docs)</a> a set of short examples and patterns for writing idiomatic <code>pandas</code> code</li>
<li><a href="https://jakevdp.github.io/PythonDataScienceHandbook/">Python Data Science Handbook</a> a full textbook with extensive details about using Pandas (and related libraries).</li>
<li><a href="http://dataanalysispython.readthedocs.io/en/latest/pandas.html">Pandas. Data Processing (Data Analysis in Python)</a></li>
</ul>
</div>
<!-- One particularly useful method to mention is the [`apply()`](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html#pandas.Series.apply) method. This method is used to _apply_ a particular **callback function** to each element in the series. This is a _mapping_ operation, similar to what you've done with the `map()` function:

```python
def square(n):  # a function that squares a number
    return n**2

number_series = pd.Series([1,2,3,4,5])  # an initial series

square_series = number_series.apply(square)
list(square_series)  # [1, 4, 9, 16, 25]

# Can also apply built-in functions
import math
sqrt_series = number_series.apply(math.sqrt)
list(sqrt_series)  # [1.0, 1.4142135623730951, 1.7320508075688772, 2.0, 2.2360679774997898]

# Pass additional arguments as keyword args (or `args` for a single argument)
cubed_series = number_series.apply(math.pow, args=(3,)) # call math.pow(n, 3) on each
list(cubed_series)  # [1.0, 8.0, 27.0, 64.0, 125.0]
``` -->
<!-- FOR FUTURE VERSIONS -->
<!-- //piping -->
<!-- //merge/join/GROUPING -->
<!-- ## Joining DataFrames Together
//concat/append are things

//merge(join) explanation
    - default is "inner"
    - compare to "left" & "right" [`how`] key -->

</div>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="functional-programming.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="accessing-web-apis.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": true,
"facebook": false,
"twitter": false,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"whatsapp": false,
"all": ["github", "facebook", "twitter", "google"],
"google": false
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": "https://github.com/infx511/book/edit/master/pandas.Rmd",
"text": "Edit"
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"search": {
"engine": "fuse",
"options": null
},
"toc": {
"collapse": "section",
"scroll_highlight": true
}
});
});
</script>

</body>

</html>