2-keras.html

<!DOCTYPE html>
<!-- START: inst/pkgdown/templates/layout.html --><!-- Generated by pkgdown: do not edit by hand --><html lang="en" data-bs-theme="auto"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><title>Introduction to deep learning: Classification by a neural network using Keras</title><meta name="viewport" content="width=device-width, initial-scale=1"><script src="assets/themetoggle.js"></script><link rel="stylesheet" type="text/css" href="assets/styles.css"><script src="assets/scripts.js" type="text/javascript"></script><!-- mathjax --><script type="text/x-mathjax-config">
    MathJax.Hub.Config({
      config: ["MMLorHTML.js"],
      jax: ["input/TeX","input/MathML","output/HTML-CSS","output/NativeMML", "output/PreviewHTML"],
      extensions: ["tex2jax.js","mml2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "a11y/accessibility-menu.js"],
      TeX: {
        extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
      },
      tex2jax: {
        inlineMath: [['\\(', '\\)']],
        displayMath: [ ['$$','$$'], ['\\[', '\\]'] ],
        processEscapes: true
      }
    });
    </script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><!-- Responsive Favicon for The Carpentries --><link rel="apple-touch-icon" sizes="180x180" href="favicons/incubator/apple-touch-icon.png"><link rel="icon" type="image/png" sizes="32x32" href="favicons/incubator/favicon-32x32.png"><link rel="icon" type="image/png" sizes="16x16" href="favicons/incubator/favicon-16x16.png"><link rel="manifest" href="favicons/incubator/site.webmanifest"><link rel="mask-icon" href="favicons/incubator/safari-pinned-tab.svg" color="#5bbad5"><meta name="msapplication-TileColor" content="#da532c"><meta name="theme-color" media="(prefers-color-scheme: light)" content="white"><meta name="theme-color" media="(prefers-color-scheme: dark)" content="black"></head><body>
    <header id="top" class="navbar navbar-expand-md top-nav incubator"><svg xmlns="http://www.w3.org/2000/svg" class="d-none"><symbol id="check2" viewbox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"></path></symbol><symbol id="circle-half" viewbox="0 0 16 16"><path d="M8 15A7 7 0 1 0 8 1v14zm0 1A8 8 0 1 1 8 0a8 8 0 0 1 0 16z"></path></symbol><symbol id="moon-stars-fill" viewbox="0 0 16 16"><path d="M6 .278a.768.768 0 0 1 .08.858 7.208 7.208 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277.527 0 1.04-.055 1.533-.16a.787.787 0 0 1 .81.316.733.733 0 0 1-.031.893A8.349 8.349 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.752.752 0 0 1 6 .278z"></path><path d="M10.794 3.148a.217.217 0 0 1 .412 0l.387 1.162c.173.518.579.924 1.097 1.097l1.162.387a.217.217 0 0 1 0 .412l-1.162.387a1.734 1.734 0 0 0-1.097 1.097l-.387 1.162a.217.217 0 0 1-.412 0l-.387-1.162A1.734 1.734 0 0 0 9.31 6.593l-1.162-.387a.217.217 0 0 1 0-.412l1.162-.387a1.734 1.734 0 0 0 1.097-1.097l.387-1.162zM13.863.099a.145.145 0 0 1 .274 0l.258.774c.115.346.386.617.732.732l.774.258a.145.145 0 0 1 0 .274l-.774.258a1.156 1.156 0 0 0-.732.732l-.258.774a.145.145 0 0 1-.274 0l-.258-.774a1.156 1.156 0 0 0-.732-.732l-.774-.258a.145.145 0 0 1 0-.274l.774-.258c.346-.115.617-.386.732-.732L13.863.1z"></path></symbol><symbol id="sun-fill" viewbox="0 0 16 16"><path d="M8 12a4 4 0 1 0 0-8 4 4 0 0 0 0 8zM8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0zm0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13zm8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5zM3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8zm10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0zm-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0zm9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707zM4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708z"></path></symbol></svg><a class="visually-hidden-focusable skip-link" href="#main-content">Skip to main content</a>
  <div class="container-fluid top-nav-container">
    <div class="col-md-8">
      <div class="large-logo">
        <img id="incubator-logo" alt="Carpentries Incubator" src="assets/images/incubator-logo.svg"></div>
    </div>
    <div class="selector-container">
      <div id="theme-selector">
        <li class="nav-item dropdown" id="theme-button-list">
          <button class="btn btn-link nav-link px-0 px-lg-2 dropdown-toggle d-flex align-items-center" id="bd-theme" type="button" aria-expanded="false" data-bs-toggle="dropdown" data-bs-display="static" aria-label="Toggle theme (auto)">
            <svg class="bi my-1 theme-icon-active"><use href="#circle-half"></use></svg><i data-feather="chevron-down"></i>
          </button>
          <ul class="dropdown-menu dropdown-menu-end" aria-labelledby="bd-theme-text"><li>
              <button type="button" class="btn dropdown-item d-flex align-items-center" data-bs-theme-value="light" aria-pressed="false">
                <svg class="bi me-2 theme-icon"><use href="#sun-fill"></use></svg>
                Light
                <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
            </li>
            <li>
              <button type="button" class="btn dropdown-item d-flex align-items-center" data-bs-theme-value="dark" aria-pressed="false">
                <svg class="bi me-2 theme-icon"><use href="#moon-stars-fill"></use></svg>
                Dark
                <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
            </li>
            <li>
              <button type="button" class="btn dropdown-item d-flex align-items-center active" data-bs-theme-value="auto" aria-pressed="true">
                <svg class="bi me-2 theme-icon"><use href="#circle-half"></use></svg>
                Auto
                <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
            </li>
          </ul></li>
      </div>

      <div class="dropdown" id="instructor-dropdown">
        <button class="btn btn-secondary dropdown-toggle bordered-button" type="button" id="dropdownMenu1" data-bs-toggle="dropdown" aria-expanded="false">
          <i aria-hidden="true" class="icon" data-feather="eye"></i> Learner View <i data-feather="chevron-down"></i>
        </button>
        <ul class="dropdown-menu" aria-labelledby="dropdownMenu1"><li><button class="dropdown-item" type="button" onclick="window.location.href='instructor/2-keras.html';">Instructor View</button></li>
        </ul></div>
    </div>
  </div>
  <hr></header><nav class="navbar navbar-expand-xl bottom-nav incubator" aria-label="Main Navigation"><div class="container-fluid nav-container">
    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle Navigation">
      <span class="navbar-toggler-icon"></span>
      <span class="menu-title">Menu</span>
    </button>
    <div class="nav-logo">
      <img class="small-logo" alt="Carpentries Incubator" src="assets/images/incubator-logo-sm.svg"></div>
    <div class="lesson-title-md">
      Introduction to deep learning
    </div>
    <div class="search-icon-sm">
      <!-- TODO: do not show until we have search
        <i role="img" aria-label="Search the All In One page" data-feather="search"></i>
      -->
    </div>
    <div class="desktop-nav">
      <ul class="navbar-nav me-auto mb-2 mb-lg-0"><li class="nav-item">
          <span class="lesson-title">
            Introduction to deep learning
          </span>
        </li>
        <li class="nav-item">
          <a class="nav-link" href="key-points.html">Key Points</a>
        </li>
        <li class="nav-item">
          <a class="nav-link" href="reference.html#glossary">Glossary</a>
        </li>
        <li class="nav-item">
          <a class="nav-link" href="profiles.html">Learner Profiles</a>
        </li>
        <li class="nav-item dropdown">
          <button class="nav-link dropdown-toggle" id="navbarDropdown" data-bs-toggle="dropdown" aria-expanded="false">
            More <i data-feather="chevron-down"></i>
          </button>
          <ul class="dropdown-menu" aria-labelledby="navbarDropdown"><li><a class="dropdown-item" href="reference.html">Reference</a></li>
          </ul></li>
      </ul></div>
    <!--
    <form class="d-flex col-md-2 search-form">
      <fieldset disabled>
      <input class="form-control me-2 searchbox" type="search" placeholder="" aria-label="">
        <button class="btn btn-outline-success tablet-search-button"  type="submit">
          <i class="search-icon" data-feather="search" role="img" aria-label="Search the All In One page"></i>
        </button>
      </fieldset>
    </form>
    -->
    <a id="search-button" class="btn btn-primary" href="aio.html" role="button" aria-label="Search the All In One page">Search the All In One page</a>
  </div><!--/div.container-fluid -->
</nav><div class="col-md-12 mobile-title">
  Introduction to deep learning
</div>

<aside class="col-md-12 lesson-progress"><div style="width: 13%" class="percentage">
    13%
  </div>
  <div class="progress incubator">
    <div class="progress-bar incubator" role="progressbar" style="width: 13%" aria-valuenow="13" aria-label="Lesson Progress" aria-valuemin="0" aria-valuemax="100">
    </div>
  </div>
</aside><div class="container">
      <div class="row">
        <!-- START: inst/pkgdown/templates/navbar.html -->
<div id="sidebar-col" class="col-lg-4">
  <div id="sidebar" class="sidebar">
      <nav aria-labelledby="flush-headingEleven"><button role="button" aria-label="close menu" alt="close menu" aria-expanded="true" aria-controls="sidebar" class="collapse-toggle" data-collapse="Collapse " data-episodes="Episodes ">
          <i class="search-icon" data-feather="x" role="img"></i>
        </button>
        <div class="sidebar-inner">
          <div class="row mobile-row" id="theme-row-mobile">
            <div class="col" id="theme-selector">
              <li class="nav-item dropdown" id="theme-button-list">
                <button class="btn btn-link nav-link px-0 px-lg-2 dropdown-toggle d-flex align-items-center" id="bd-theme" type="button" aria-expanded="false" data-bs-toggle="dropdown" data-bs-display="static" aria-label="Toggle theme (auto)">
                  <svg class="bi my-1 theme-icon-active"><use href="#circle-half"></use></svg><span class="d-lg-none ms-1" id="bd-theme-text">Toggle Theme</span>
                </button>
                <ul class="dropdown-menu dropdown-menu-right" aria-labelledby="bd-theme-text"><li>
                    <button type="button" class="btn dropdown-item d-flex align-items-center" data-bs-theme-value="light" aria-pressed="false">
                      <svg class="bi me-2 theme-icon"><use href="#sun-fill"></use></svg>
                      Light
                      <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
                  </li>
                  <li>
                    <button type="button" class="btn dropdown-item d-flex align-items-center" data-bs-theme-value="dark" aria-pressed="false">
                      <svg class="bi me-2 theme-icon"><use href="#moon-stars-fill"></use></svg>
                      Dark
                      <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
                  </li>
                  <li>
                    <button type="button" class="btn dropdown-item d-flex align-items-center active" data-bs-theme-value="auto" aria-pressed="true">
                      <svg class="bi me-2 theme-icon"><use href="#circle-half"></use></svg>
                      Auto
                      <svg class="bi ms-auto d-none"><use href="#check2"></use></svg></button>
                  </li>
                </ul></li>
            </div>
          </div>
          <div class="row mobile-row">
            <div class="col">
              <div class="sidenav-view-selector">
                <div class="accordion accordion-flush" id="accordionFlush9">
                  <div class="accordion-item">
                    <h2 class="accordion-header" id="flush-headingNine">
                      <button class="accordion-button collapsed" id="instructor" type="button" data-bs-toggle="collapse" data-bs-target="#flush-collapseNine" aria-expanded="false" aria-controls="flush-collapseNine">
                        <i id="eye" aria-hidden="true" class="icon" data-feather="eye"></i> Learner View
                      </button>
                    </h2>
                    <div id="flush-collapseNine" class="accordion-collapse collapse" aria-labelledby="flush-headingNine" data-bs-parent="#accordionFlush2">
                      <div class="accordion-body">
                        <a href="instructor/2-keras.html">Instructor View</a>
                      </div>
                    </div>
                  </div><!--/div.accordion-item-->
                </div><!--/div.accordion-flush-->
              </div><!--div.sidenav-view-selector -->
            </div><!--/div.col -->

            <hr></div><!--/div.mobile-row -->

          <div class="accordion accordion-flush" id="accordionFlush11">
            <div class="accordion-item">

              <button id="chapters" class="accordion-button show" type="button" data-bs-toggle="collapse" data-bs-target="#flush-collapseEleven" aria-expanded="false" aria-controls="flush-collapseEleven">
                <h2 class="accordion-header chapters" id="flush-headingEleven">
                  EPISODES
                </h2>
              </button>
              <div id="flush-collapseEleven" class="accordion-collapse show collapse" aria-labelledby="flush-headingEleven" data-bs-parent="#accordionFlush11">

                <div class="accordion-body">
                  <div class="accordion accordion-flush" id="accordionFlush1">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading1">
        <a href="index.html">Summary and Setup</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlush2">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading2">
        <a href="1-introduction.html">1. Introduction</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlushcurrent">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-headingcurrent">
      <button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#flush-collapsecurrent" aria-expanded="true" aria-controls="flush-collapsecurrent">
        <span class="visually-hidden">Current Chapter</span>
        <span class="current-chapter">
        2. Classification by a neural network using Keras
        </span>
      </button>
    </div><!--/div.accordion-header-->

    <div id="flush-collapsecurrent" class="accordion-collapse collapse show" aria-labelledby="flush-headingcurrent" data-bs-parent="#accordionFlushcurrent">
      <div class="accordion-body">
        <ul><li><a href="#introduction">Introduction</a></li>
<li><a href="#formulateoutline-the-problem-penguin-classification">1. Formulate/outline the problem: penguin classification</a></li>
<li><a href="#identify-inputs-and-outputs">2. Identify inputs and outputs</a></li>
<li><a href="#prepare-data">3. Prepare data</a></li>
<li><a href="#build-an-architecture-from-scratch-or-choose-a-pretrained-model">4. Build an architecture from scratch or choose a pretrained
model</a></li>
<li><a href="#choose-a-loss-function-and-optimizer">5. Choose a loss function and optimizer</a></li>
<li><a href="#train-model">6. Train model</a></li>
<li><a href="#perform-a-predictionclassification">7. Perform a prediction/classification</a></li>
<li><a href="#measuring-performance">8. Measuring performance</a></li>
<li><a href="#refine-the-model">9. Refine the model</a></li>
<li><a href="#share-model">10. Share model</a></li>
        </ul></div><!--/div.accordion-body-->
    </div><!--/div.accordion-collapse-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlush4">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading4">
        <a href="3-monitor-the-model.html">3. Monitor the training process</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlush5">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading5">
        <a href="4-advanced-layer-types.html">4. Advanced layer types</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlush6">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading6">
        <a href="5-transfer-learning.html">5. Transfer learning</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

<div class="accordion accordion-flush" id="accordionFlush7">
  <div class="accordion-item">
    <div class="accordion-header" id="flush-heading7">
        <a href="6-outlook.html">6. Outlook</a>
    </div><!--/div.accordion-header-->

  </div><!--/div.accordion-item-->
</div><!--/div.accordion-flush-->

                </div>
              </div>
            </div>

            <hr class="half-width"><div class="accordion accordion-flush lesson-resources" id="accordionFlush12">
              <div class="accordion-item">
                <h2 class="accordion-header" id="flush-headingTwelve">
                  <button class="accordion-button collapsed" id="lesson-resources" type="button" data-bs-toggle="collapse" data-bs-target="#flush-collapseTwelve" aria-expanded="false" aria-controls="flush-collapseTwelve">
                    RESOURCES
                  </button>
                </h2>
                <div id="flush-collapseTwelve" class="accordion-collapse collapse" aria-labelledby="flush-headingTwelve" data-bs-parent="#accordionFlush12">
                  <div class="accordion-body">
                    <ul><li>
                        <a href="key-points.html">Key Points</a>
                      </li>
                      <li>
                        <a href="reference.html#glossary">Glossary</a>
                      </li>
                      <li>
                        <a href="profiles.html">Learner Profiles</a>
                      </li>
                      <li><a href="reference.html">Reference</a></li>
                    </ul></div>
                </div>
              </div>
            </div>
            <hr class="half-width lesson-resources"><a href="aio.html">See all in one page</a>


            <hr class="d-none d-sm-block d-md-none"><div class="d-grid gap-1">

            </div>
          </div><!-- /div.accordion -->
        </div><!-- /div.sidebar-inner -->
      </nav></div><!-- /div.sidebar -->
  </div><!-- /div.sidebar-col -->
<!-- END:   inst/pkgdown/templates/navbar.html-->

        <!-- START: inst/pkgdown/templates/content-instructor.html -->
  <div class="col-xl-8 col-lg-12 primary-content">
    <nav class="lesson-content mx-md-4" aria-label="Previous and Next Chapter"><!-- content for small screens --><div class="d-block d-sm-block d-md-none">
        <a class="chapter-link" href="1-introduction.html"><i aria-hidden="true" class="small-arrow" data-feather="arrow-left"></i>Previous</a>
        <a class="chapter-link float-end" href="3-monitor-the-model.html">Next<i aria-hidden="true" class="small-arrow" data-feather="arrow-right"></i></a>
      </div>
      <!-- content for large screens -->
      <div class="d-none d-sm-none d-md-block">
        <a class="chapter-link" href="1-introduction.html" rel="prev">
          <i aria-hidden="true" class="small-arrow" data-feather="arrow-left"></i>
          Previous: Introduction
        </a>
        <a class="chapter-link float-end" href="3-monitor-the-model.html" rel="next">
          Next: Monitor the training...
          <i aria-hidden="true" class="small-arrow" data-feather="arrow-right"></i>
        </a>
      </div>
      <hr></nav><main id="main-content" class="main-content"><div class="container lesson-content">
        <h1>Classification by a neural network using Keras</h1>
        <p>Last updated on 2024-12-03 |

        <a href="https://github.com/carpentries-incubator/deep-learning-intro/edit/main/episodes/2-keras.Rmd" class="external-link">Edit this page <i aria-hidden="true" data-feather="edit"></i></a></p>


        <div class="text-end">
          <button role="button" aria-pressed="false" tabindex="0" id="expand-code" class="pull-right" data-expand="Expand All Solutions " data-collapse="Collapse All Solutions "> Expand All Solutions <i aria-hidden="true" data-feather="plus"></i></button>
        </div>

        
<div class="overview card">
<h2 class="card-header">Overview</h2>
<div class="row g-0">
<div class="col-md-4">
<div class="card-body">
<div class="inner">
<h3 class="card-title">Questions</h3>
<ul><li>How do I compose a neural network using Keras?</li>
<li>How do I train this network on a dataset?</li>
<li>How do I get insight into learning process?</li>
<li>How do I measure the performance of the network?</li>
</ul></div>
</div>
</div>
<div class="col-md-8">
<div class="card-body">
<div class="inner bordered">
<h3 class="card-title">Objectives</h3>
<ul><li>Use the deep learning workflow to structure the notebook</li>
<li>Explore the dataset using pandas and seaborn</li>
<li>Identify the inputs and outputs of a deep neural network.</li>
<li>Use one-hot encoding to prepare data for classification in
Keras</li>
<li>Describe a fully connected layer</li>
<li>Implement a fully connected layer with Keras</li>
<li>Use Keras to train a small fully connected network on prepared
data</li>
<li>Interpret the loss curve of the training process</li>
<li>Use a confusion matrix to measure the trained networks’ performance
on a test set</li>
</ul></div>
</div>
</div>
</div>
</div>
<section><h2 class="section-heading" id="introduction">Introduction<a class="anchor" aria-label="anchor" href="#introduction"></a></h2>
<hr class="half-width"><p>In this episode we will learn how to create and train a neural
network using Keras to solve a simple classification task.</p>
<p>The goal of this episode is to quickly get your hands dirty in
actually defining and training a neural network, without going into
depth of how neural networks work on a technical or mathematical level.
We want you to go through the full deep learning workflow once before
going into more details.</p>
<p>In fact, this is also what we would recommend you to do when working
on real-world problems: First quickly build a working pipeline, while
taking shortcuts. Then, slowly make the pipeline more advanced while you
keep on evaluating the approach.</p>
<p>In <a href="3-monitor-the-model.html">episode 3</a> we will expand on
the concepts that are lightly introduced in this episode. Some of these
concepts include: how to monitor the training progress and how
optimization works.</p>

<p>As a reminder below are the steps of the deep learning workflow:</p>
<ol style="list-style-type: decimal"><li>Formulate / Outline the problem</li>
<li>Identify inputs and outputs</li>
<li>Prepare data</li>
<li>Choose a pretrained model or start building architecture from
scratch</li>
<li>Choose a loss function and optimizer</li>
<li>Train the model</li>
<li>Perform a Prediction/Classification</li>
<li>Measure performance</li>
<li>Refine the model</li>
<li>Save model</li>
</ol><p>In this episode we will focus on a minimal example for each of these
steps, later episodes will build on this knowledge to go into greater
depth for some or all of these steps.</p>
<div id="gpu-usage" class="callout">
<div class="callout-square">
<i class="callout-icon" data-feather="bell"></i>
</div>
<div id="gpu-usage" class="callout-inner">
<h3 class="callout-title">GPU usage</h3>
<div class="callout-content">
<p>For this lesson having a <a href="https://glosario.carpentries.org/en/#gpu" class="external-link">GPU (graphics processing
unit)</a> available is not needed. We specifically use very small toy
problems so that you do not need one. However, Keras will use your GPU
automatically when it is available. Using a GPU becomes necessary when
tackling larger datasets or complex problems which require a more
complex neural network.</p>
</div>
</div>
</div>
</section><section><h2 class="section-heading" id="formulateoutline-the-problem-penguin-classification">1. Formulate/outline the problem: penguin classification<a class="anchor" aria-label="anchor" href="#formulateoutline-the-problem-penguin-classification"></a></h2>
<hr class="half-width"><p>In this episode we will be using the <a href="https://zenodo.org/record/3960218" class="external-link">penguin dataset</a>. This is a
dataset that was published in 2020 by Allison Horst and contains data on
three different species of the penguins.</p>
<p>We will use the penguin dataset to train a neural network which can
classify which species a penguin belongs to, based on their physical
characteristics.</p>
<div id="goal" class="callout">
<div class="callout-square">
<i class="callout-icon" data-feather="bell"></i>
</div>
<div id="goal" class="callout-inner">
<h3 class="callout-title">Goal</h3>
<div class="callout-content">
<p>The goal is to predict a penguins’ species using the attributes
available in this dataset.</p>
</div>
</div>
</div>
<p>The <code>palmerpenguins</code> data contains size measurements for
three penguin species observed on three islands in the Palmer
Archipelago, Antarctica. The physical attributes measured are flipper
length, beak length, beak width, body mass, and sex.</p>
<figure><img src="fig/palmer_penguins.png" title="Palmer Penguins" alt="Illustration of the three species of penguins found in the Palmer Archipelago, Antarctica: Chinstrap, Gentoo and Adele" class="figure mx-auto d-block"><div class="figcaption"><em>Artwork by <span class="citation">@allison_horst</span></em></div>
</figure><figure><img src="fig/culmen_depth.png" title="Culmen Depth" alt='Illustration of how the beak dimensions were measured. In the raw data, bill dimensions are recorded as "culmen length" and "culmen depth". The culmen is the dorsal ridge atop the bill.' class="figure mx-auto d-block"><div class="figcaption"><em>Artwork by <span class="citation">@allison_horst</span></em></div>
</figure><p>These data were collected from 2007 - 2009 by Dr. Kristen Gorman with
the <a href="https://lternet.edu/site/palmer-antarctica-lter/" class="external-link">Palmer
Station Long Term Ecological Research Program</a>, part of the <a href="https://lternet.edu/" class="external-link">US Long Term Ecological Research
Network</a>. The data were imported directly from the <a href="https://edirepository.org/" class="external-link">Environmental Data Initiative</a>
(EDI) Data Portal, and are available for use by CC0 license (“No Rights
Reserved”) in accordance with the <a href="https://lternet.edu/data-access-policy/" class="external-link">Palmer Station Data
Policy</a>.</p>
</section><section><h2 class="section-heading" id="identify-inputs-and-outputs">2. Identify inputs and outputs<a class="anchor" aria-label="anchor" href="#identify-inputs-and-outputs"></a></h2>
<hr class="half-width"><p>To identify the inputs and outputs that we will use to design the
neural network we need to familiarize ourselves with the dataset. This
step is sometimes also called data exploration.</p>
<p>We will start by importing the <a href="https://seaborn.pydata.org/" class="external-link">Seaborn</a> library that will help us
get the dataset and visualize it. Seaborn is a powerful library with
many visualizations. Keep in mind it requires the data to be in a pandas
dataframe, luckily the datasets available in seaborn are already in a
pandas dataframe.</p>
<div class="codewrapper sourceCode" id="cb1">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span></code></pre>
</div>
<p>We can load the penguin dataset using</p>
<div class="codewrapper sourceCode" id="cb2">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">'penguins'</span>)</span></code></pre>
</div>
<p>This will give you a pandas dataframe which contains the penguin
data.</p>
<div class="section level3">
<h3 id="inspecting-the-data">Inspecting the data<a class="anchor" aria-label="anchor" href="#inspecting-the-data"></a></h3>
<p>Using the pandas <code>head</code> function gives us a quick look at
the data:</p>
<div class="codewrapper sourceCode" id="cb3">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>penguins.head()</span></code></pre>
</div>
<table style="width:100%;" class="table"><colgroup><col width="6%"><col width="14%"><col width="13%"><col width="17%"><col width="11%"><col width="11%"><col width="11%"><col width="11%"></colgroup><thead><tr class="header"><th align="right"></th>
<th align="right">species</th>
<th align="right">island</th>
<th align="right">bill_length_mm</th>
<th align="right">bill_depth_mm</th>
<th align="right">flipper_length_mm</th>
<th align="right">body_mass_g</th>
<th align="right">sex</th>
</tr></thead><tbody><tr class="odd"><td align="right">0</td>
<td align="right">Adelie</td>
<td align="right">Torgersen</td>
<td align="right">39.1</td>
<td align="right">18.7</td>
<td align="right">181.0</td>
<td align="right">3750.0</td>
<td align="right">Male</td>
</tr><tr class="even"><td align="right">1</td>
<td align="right">Adelie</td>
<td align="right">Torgersen</td>
<td align="right">39.5</td>
<td align="right">17.4</td>
<td align="right">186.0</td>
<td align="right">3800.0</td>
<td align="right">Female</td>
</tr><tr class="odd"><td align="right">2</td>
<td align="right">Adelie</td>
<td align="right">Torgersen</td>
<td align="right">40.3</td>
<td align="right">18.0</td>
<td align="right">195.0</td>
<td align="right">3250.0</td>
<td align="right">Female</td>
</tr><tr class="even"><td align="right">3</td>
<td align="right">Adelie</td>
<td align="right">Torgersen</td>
<td align="right">NaN</td>
<td align="right">NaN</td>
<td align="right">NaN</td>
<td align="right">NaN</td>
<td align="right">NaN</td>
</tr><tr class="odd"><td align="right">4</td>
<td align="right">Adelie</td>
<td align="right">Torgersen</td>
<td align="right">36.7</td>
<td align="right">19.3</td>
<td align="right">193.0</td>
<td align="right">3450.0</td>
<td align="right">Female</td>
</tr></tbody></table><p>We can use all columns as features to predict the species of the
penguin, except for the <code>species</code> column itself.</p>
<p>Let’s look at the shape of the dataset:</p>
<div class="codewrapper sourceCode" id="cb4">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>penguins.shape</span></code></pre>
</div>
<p>There are 344 samples and 7 columns (plus the index column), so 6
features.</p>
</div>
<div class="section level3">
<h3 id="visualization">Visualization<a class="anchor" aria-label="anchor" href="#visualization"></a></h3>
<p>Looking at numbers like this usually does not give a very good
intuition about the data we are working with, so let us create a
visualization.</p>
<div class="section level4">
<h4 id="pair-plot">Pair Plot<a class="anchor" aria-label="anchor" href="#pair-plot"></a></h4>
<p>One nice visualization for datasets with relatively few attributes is
the Pair Plot. This can be created using <code>sns.pairplot(...)</code>.
It shows a scatterplot of each attribute plotted against each of the
other attributes. By using the <code>hue='species'</code> setting for
the pairplot the graphs on the diagonal are layered kernel density
estimate plots for the different values of the <code>species</code>
column.</p>
<div class="codewrapper sourceCode" id="cb5">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>sns.pairplot(penguins, hue<span class="op">=</span><span class="st">"species"</span>)</span></code></pre>
</div>
<figure><img src="fig/pairplot.png" title="Pair Plot" alt="Grid of scatter plots and histograms comparing observed values of the four physicial attributes (features) measured in the penguins sampled. Scatter plots illustrate the distribution of values observed for each pair of features. On the diagonal, where one feature would be compared with itself, histograms are displayed that show the distribution of values observed for that feature, coloured according to the species of the individual sampled. The pair plot shows distinct but overlapping clusters of data points representing the different species, with no pair of features providing a clean separation of clusters on its own." class="figure mx-auto d-block"></figure><div id="pairplot" class="callout challenge">
<div class="callout-square">
<i class="callout-icon" data-feather="zap"></i>
</div>
<div id="pairplot" class="callout-inner">
<h3 class="callout-title">Pairplot</h3>
<div class="callout-content">
<p>Take a look at the pairplot we created. Consider the following
questions:</p>
<ul><li>Is there any class that is easily distinguishable from the
others?</li>
<li>Which combination of attributes shows the best separation for all 3
class labels at once?</li>
<li>(optional) Create a similar pairplot, but with
<code>hue="sex"</code>. Explain the patterns you see. Which combination
of features distinguishes the two sexes best?</li>
</ul></div>
</div>
</div>
<div id="accordionSolution1" class="accordion challenge-accordion accordion-flush">
<div class="accordion-item">
<button class="accordion-button solution-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseSolution1" aria-expanded="false" aria-controls="collapseSolution1">
  <h4 class="accordion-header" id="headingSolution1"> Show me the solution </h4>
</button>
<div id="collapseSolution1" class="accordion-collapse collapse" aria-labelledby="headingSolution1" data-bs-parent="#accordionSolution1">
<div class="accordion-body">
<ul><li>The plots show that the green class, Gentoo is somewhat more easily
distinguishable from the other two.</li>
<li>The other two seem to be separable by a combination of bill length
and bill depth (other combinations are also possible such as bill length
and flipper length).</li>
</ul><p>Answer to optional question:</p>
<div class="codewrapper sourceCode" id="cb6">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>sns.pairplot(penguins, hue<span class="op">=</span><span class="st">'sex'</span>)</span></code></pre>
</div>
<figure><img src="fig/02_sex_pairplot.png" title="Pair plot grouped by sex" alt="Grid of scatter plots and histograms comparing observed values of the four physicial attributes (features) measured in the penguins sampled, with data points coloured according to the sex of the individual sampled. The pair plot shows similarly-shaped distribution of values observed for each feature in male and female penguins, with the distribution of measurements for females skewed towards smaller values." class="figure mx-auto d-block"></figure><p>You see that for each species females have smaller bills and
flippers, as well as a smaller body mass. You would need a combination
of the species and the numerical features to successfully distinguish
males from females. The combination of <code>bill_depth_mm</code> and
<code>body_mass_g</code> gives the best separation.</p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="section level3">
<h3 id="input-and-output-selection">Input and Output Selection<a class="anchor" aria-label="anchor" href="#input-and-output-selection"></a></h3>
<p>Now that we have familiarized ourselves with the dataset we can
select the data attributes to use as input for the neural network and
the target that we want to predict.</p>
<p>In the rest of this episode we will use the
<code>bill_length_mm</code>, <code>bill_depth_mm</code>,
<code>flipper_length_mm</code>, <code>body_mass_g</code> attributes. The
target for the classification task will be the <code>species</code>.</p>
<div id="data-exploration" class="callout">
<div class="callout-square">
<i class="callout-icon" data-feather="bell"></i>
</div>
<div id="data-exploration" class="callout-inner">
<h3 class="callout-title">Data Exploration</h3>
<div class="callout-content">
<p>Exploring the data is an important step to familiarize yourself with
the problem and to help you determine the relevant inputs and
outputs.</p>
</div>
</div>
</div>
</div>
</section><section><h2 class="section-heading" id="prepare-data">3. Prepare data<a class="anchor" aria-label="anchor" href="#prepare-data"></a></h2>
<hr class="half-width"><p>The input data and target data are not yet in a format that is
suitable to use for training a neural network.</p>
<p>For now we will only use the numerical features
<code>bill_length_mm</code>, <code>bill_depth_mm</code>,
<code>flipper_length_mm</code>, <code>body_mass_g</code> only, so let’s
drop the categorical columns:</p>
<div class="codewrapper sourceCode" id="cb7">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a><span class="co"># Drop categorical columns</span></span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a>penguins_filtered <span class="op">=</span> penguins.drop(columns<span class="op">=</span>[<span class="st">'island'</span>, <span class="st">'sex'</span>])</span></code></pre>
</div>
<div class="section level3">
<h3 id="clean-missing-values">Clean missing values<a class="anchor" aria-label="anchor" href="#clean-missing-values"></a></h3>
<p>During the exploration phase you may have noticed that some rows in
the dataset have missing (NaN) values, leaving such values in the input
data will ruin the training, so we need to deal with them. There are
many ways to deal with missing values, but for now we will just remove
the offending rows by adding a call to <code>dropna()</code>:</p>
<div class="codewrapper sourceCode" id="cb8">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a><span class="co"># Drop the rows that have NaN values in them</span></span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a>penguins_filtered <span class="op">=</span> penguins_filtered.dropna()</span></code></pre>
</div>
<p>Finally, we select only the features</p>
<div class="codewrapper sourceCode" id="cb9">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a><span class="co"># Extract columns corresponding to features</span></span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a>features <span class="op">=</span> penguins_filtered.drop(columns<span class="op">=</span>[<span class="st">'species'</span>])</span></code></pre>
</div>
</div>
<div class="section level3">
<h3 id="prepare-target-data-for-training">Prepare target data for training<a class="anchor" aria-label="anchor" href="#prepare-target-data-for-training"></a></h3>
<p>Second, the target data is also in a format that cannot be used in
training. A neural network can only take numerical inputs and outputs,
and learns by calculating how “far away” the species predicted by the
neural network is from the true species.</p>
<p>When the target is a string category column as we have here, we need
to transform this column into a numerical format first. Again, there are
many ways to do this. We will be using the one-hot encoding. This
encoding creates multiple columns, as many as there are unique values,
and puts a 1 in the column with the corresponding correct class, and 0’s
in the other columns. For instance, for a penguin of the Adelie species
the one-hot encoding would be 1 0 0.</p>
<p>Fortunately, Pandas is able to generate this encoding for us.</p>
<div class="codewrapper sourceCode" id="cb10">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a></span>
<span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a>target <span class="op">=</span> pd.get_dummies(penguins_filtered[<span class="st">'species'</span>])</span>
<span id="cb10-4"><a href="#cb10-4" tabindex="-1"></a>target.head() <span class="co"># print out the top 5 to see what it looks like.</span></span></code></pre>
</div>
<div id="one-hot-encoding" class="callout challenge">
<div class="callout-square">
<i class="callout-icon" data-feather="zap"></i>
</div>
<div id="one-hot-encoding" class="callout-inner">
<h3 class="callout-title">One-hot encoding</h3>
<div class="callout-content">
<p>How many output neurons will our network have now that we one-hot
encoded the target class?</p>
<ul><li>A: 1</li>
<li>B: 2</li>
<li>C: 3</li>
</ul></div>
</div>
</div>
<div id="accordionSolution2" class="accordion challenge-accordion accordion-flush">
<div class="accordion-item">
<button class="accordion-button solution-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseSolution2" aria-expanded="false" aria-controls="collapseSolution2">
  <h4 class="accordion-header" id="headingSolution2"> Show me the solution </h4>
</button>
<div id="collapseSolution2" class="accordion-collapse collapse" aria-labelledby="headingSolution2" data-bs-parent="#accordionSolution2">
<div class="accordion-body">
<p>C: 3, one for each output variable class</p>
</div>
</div>
</div>
</div>
</div>
<div class="section level3">
<h3 id="split-data-into-training-and-test-set">Split data into training and test set<a class="anchor" aria-label="anchor" href="#split-data-into-training-and-test-set"></a></h3>
<p>Finally, we will split the dataset into a training set and a test
set. As the names imply we will use the training set to train the neural
network, while the test set is kept separate. We will use the test set
to assess the performance of the trained neural network on unseen
samples. In many cases a validation set is also kept separate from the
training and test sets (i.e. the dataset is split into 3 parts). This
validation set is then used to select the values of the parameters of
the neural network and the training methods. For this episode we will
keep it at just a training and test set however.</p>
<p>To split the cleaned dataset into a training and test set we will use
a very convenient function from sklearn called
<code>train_test_split</code>.</p>
<p>This function takes a number of parameters which are extensively
explained in <a href="https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html" class="external-link">the
scikit-learn documentation</a> : - The first two parameters are the
dataset (in our case <code>features</code>) and the corresponding
targets (i.e. defined as target). - Next is the named parameter
<code>test_size</code> this is the fraction of the dataset that is used
for testing, in this case <code>0.2</code> means 20% of the data will be
used for testing. - <code>random_state</code> controls the shuffling of
the dataset, setting this value will reproduce the same results
(assuming you give the same integer) every time it is called. -
<code>shuffle</code> which can be either <code>True</code> or
<code>False</code>, it controls whether the order of the rows of the
dataset is shuffled before splitting. It defaults to <code>True</code>.
- <code>stratify</code> is a more advanced parameter that controls how
the split is done. By setting it to <code>target</code> the train and
test sets the function will return will have roughly the same
proportions (with regards to the number of penguins of a certain
species) as the dataset.</p>
<div class="codewrapper sourceCode" id="cb11">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="im">from</span> sklearn.model_selection <span class="im">import</span> train_test_split</span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a></span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>X_train, X_test, y_train, y_test <span class="op">=</span> train_test_split(features, target, test_size<span class="op">=</span><span class="fl">0.2</span>, random_state<span class="op">=</span><span class="dv">0</span>, shuffle<span class="op">=</span><span class="va">True</span>, stratify<span class="op">=</span>target)</span></code></pre>
</div>

</div>
</section><section><h2 class="section-heading" id="build-an-architecture-from-scratch-or-choose-a-pretrained-model">4. Build an architecture from scratch or choose a pretrained
model<a class="anchor" aria-label="anchor" href="#build-an-architecture-from-scratch-or-choose-a-pretrained-model"></a></h2>
<hr class="half-width"><div class="section level3">
<h3 id="keras-for-neural-networks">Keras for neural networks<a class="anchor" aria-label="anchor" href="#keras-for-neural-networks"></a></h3>
<p>Keras is a machine learning framework with ease of use as one of its
main features. It is part of the tensorflow python package and can be
imported using <code>from tensorflow import keras</code>.</p>
<p>Keras includes functions, classes and definitions to define deep
learning models, cost functions and optimizers (optimizers are used to
train a model).</p>
<p>Before we move on to the next section of the workflow we need to make
sure we have Keras imported. We do this as follows:</p>
<div class="codewrapper sourceCode" id="cb12">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="im">from</span> tensorflow <span class="im">import</span> keras</span></code></pre>
</div>
<p>For this episode it is useful if everyone gets the same results from
their training. Keras uses a random number generator at certain points
during its execution. Therefore we will need to set two random seeds,
one for numpy and one for tensorflow:</p>
<div class="codewrapper sourceCode" id="cb13">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="im">from</span> numpy.random <span class="im">import</span> seed</span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>seed(<span class="dv">1</span>)</span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a>keras.utils.set_random_seed(<span class="dv">2</span>)</span></code></pre>
</div>
</div>
<div class="section level3">
<h3 id="build-a-neural-network-from-scratch">Build a neural network from scratch<a class="anchor" aria-label="anchor" href="#build-a-neural-network-from-scratch"></a></h3>
<p>Now we will build a neural network from scratch, which is
surprisingly straightforward using Keras.</p>
<p>With Keras you compose a neural network by creating layers and
linking them together. For now we will only use one type of layer called
a fully connected or Dense layer. In Keras this is defined by the
<code>keras.layers.Dense</code> class.</p>
<p>A dense layer has a number of neurons, which is a parameter you can
choose when you create the layer. When connecting the layer to its input
and output layers every neuron in the dense layer gets an edge
(i.e. connection) to <strong><em>all</em></strong> of the input neurons
and <strong><em>all</em></strong> of the output neurons. The hidden
layer in the image in the introduction of this episode is a Dense
layer.</p>
<p>The input in Keras also gets special treatment, Keras automatically
calculates the number of inputs and outputs a layer needs and therefore
how many edges need to be created. This means we need to inform Keras
how big our input is going to be. We do this by instantiating a
<code>keras.Input</code> class and tell it how big our input is, thus
the number of columns it contains.</p>
<div class="codewrapper sourceCode" id="cb14">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>inputs <span class="op">=</span> keras.Input(shape<span class="op">=</span>(X_train.shape[<span class="dv">1</span>],))</span></code></pre>
</div>
<p>We store a reference to this input class in a variable so we can pass
it to the creation of our hidden layer. Creating the hidden layer can
then be done as follows:</p>
<div class="codewrapper sourceCode" id="cb15">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>hidden_layer <span class="op">=</span> keras.layers.Dense(<span class="dv">10</span>, activation<span class="op">=</span><span class="st">"relu"</span>)(inputs)</span></code></pre>
</div>
<p>The instantiation here has 2 parameters and a seemingly strange
combination of parentheses, so let us take a closer look. The first
parameter <code>10</code> is the number of neurons we want in this
layer, this is one of the hyperparameters of our system and needs to be
chosen carefully. We will get back to this in the section on refining
the model.</p>
<p>The second parameter is the activation function to use. We choose
<code>relu</code> which returns 0 for inputs that are 0 and below and
the identity function (returning the same value) for inputs above 0.
This is a commonly used activation function in deep neural networks that
is proven to work well.</p>
<p>Next we see an extra set of parenthenses with inputs in them. This
means that after creating an instance of the Dense layer we call it as
if it was a function. This tells the Dense layer to connect the layer
passed as a parameter, in this case the inputs.</p>
<p>Finally we store a reference in the <code>hidden_layer</code>
variable so we can pass it to the output layer in a minute.</p>
<p>Now we create another layer that will be our output layer. Again we
use a Dense layer and so the call is very similar to the previous
one.</p>
<div class="codewrapper sourceCode" id="cb16">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>output_layer <span class="op">=</span> keras.layers.Dense(<span class="dv">3</span>, activation<span class="op">=</span><span class="st">"softmax"</span>)(hidden_layer)</span></code></pre>
</div>
<p>Because we chose the one-hot encoding, we use three neurons for the
output layer.</p>
<p>The <code>softmax</code> activation ensures that the three output
neurons produce values in the range (0, 1) and they sum to 1. We can
interpret this as a kind of ‘probability’ that the sample belongs to a
certain species.</p>
<p>Now that we have defined the layers of our neural network we can
combine them into a Keras model which facilitates training the
network.</p>
<div class="codewrapper sourceCode" id="cb17">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a>model <span class="op">=</span> keras.Model(inputs<span class="op">=</span>inputs, outputs<span class="op">=</span>output_layer)</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a>model.summary()</span></code></pre>
</div>
<p>The model summary here can show you some information about the neural
network we have defined.</p>
<div id="trainable-and-non-trainable-parameters" class="callout">
<div class="callout-square">
<i class="callout-icon" data-feather="bell"></i>
</div>
<div id="trainable-and-non-trainable-parameters" class="callout-inner">
<h3 class="callout-title">Trainable and non-trainable parameters</h3>
<div class="callout-content">
<p>Keras distinguishes between two types of weights, namely:</p>
<ul><li><p>trainable parameters: these are weights of the neurons that are
modified when we train the model in order to minimize our loss function
(we will learn about loss functions shortly!).</p></li>
<li><p>non-trainable parameters: these are weights of the neurons that
are not changed when we train the model. These could be for many reasons
- using a pre-trained model, choice of a particular filter for a
convolutional neural network, and statistical weights for batch
normalization are some examples.</p></li>
</ul><p>If these reasons are not clear right away, don’t worry! In later
episodes of this course, we will touch upon a couple of these
concepts.</p>
</div>
</div>
</div>
<div id="create-the-neural-network" class="callout challenge">
<div class="callout-square">
<i class="callout-icon" data-feather="zap"></i>
</div>
<div id="create-the-neural-network" class="callout-inner">
<h3 class="callout-title">Create the neural network</h3>
<div class="callout-content">
<p>With the code snippets above, we defined a Keras model with 1 hidden
layer with 10 neurons and an output layer with 3 neurons.</p>
<ol style="list-style-type: decimal"><li>How many parameters does the resulting model have?</li>
<li>What happens to the number of parameters if we increase or decrease
the number of neurons in the hidden layer?</li>
</ol><div class="section level4">
<h4 id="optional-keras-sequential-vs-functional-api">(optional) Keras Sequential vs Functional API<a class="anchor" aria-label="anchor" href="#optional-keras-sequential-vs-functional-api"></a></h4>
<p>So far we have used the <a href="https://keras.io/guides/functional_api/" class="external-link">Functional API</a> of
Keras. You can also implement neural networks using <a href="https://keras.io/guides/sequential_model/" class="external-link">the Sequential
model</a>. As you can read in the documentation, the Sequential model is
appropriate for <strong>a plain stack of layers</strong> where each
layer has <strong>exactly one input tensor and one output
tensor</strong>.</p>
<ol start="3" style="list-style-type: decimal"><li>(optional) Use the Sequential model to implement the same
network</li>
</ol></div>
</div>
</div>
</div>
<div id="accordionSolution3" class="accordion challenge-accordion accordion-flush">
<div class="accordion-item">
<button class="accordion-button solution-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseSolution3" aria-expanded="false" aria-controls="collapseSolution3">
  <h4 class="accordion-header" id="headingSolution3"> Show me the solution </h4>
</button>
<div id="collapseSolution3" class="accordion-collapse collapse" aria-labelledby="headingSolution3" data-bs-parent="#accordionSolution3">
<div class="accordion-body">
<p>Have a look at the output of <code>model.summary()</code>:</p>
<div class="codewrapper sourceCode" id="cb18">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>model.summary()</span></code></pre>
</div>
<div class="codewrapper">
<h3 class="code-label">OUTPUT<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="output" tabindex="0"><code>Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
input_1 (InputLayer)         [(None, 4)]               0
_________________________________________________________________
dense (Dense)                (None, 10)                50
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 33
=================================================================
Total params: 83
Trainable params: 83
Non-trainable params: 0
_________________________________________________________________</code></pre>
</div>
<p>The model has 83 trainable parameters. Each of the 10 neurons in the
in the <code>dense</code> hidden layer is connected to each of the 4
inputs in the input layer resulting in 40 weights that can be trained.
The 10 neurons in the hidden layer are also connected to each of the 3
outputs in the <code>dense_1</code> output layer, resulting in a further
30 weights that can be trained. By default <code>Dense</code> layers in
Keras also contain 1 bias term for each neuron, resulting in a further
10 bias values for the hidden layer and 3 bias terms for the output
layer. <code>40+30+10+3=83</code> trainable parameters.</p>
<p>If you increase the number of neurons in the hidden layer the number
of trainable parameters in both the hidden and output layer increases or
decreases in accordance with the number of neurons added. Each extra
neuron has 4 weights connected to the input layer, 1 bias term, and 3
weights connected to the output layer. So in total 8 extra
parameters.</p>
<p><em>The name in quotes within the string
<code>Model: "model_1"</code> may be different in your view; this detail
is not important.</em></p>
<div class="section level4">
<h4 id="optional-keras-sequential-vs-functional-api-1">(optional) Keras Sequential vs Functional API<a class="anchor" aria-label="anchor" href="#optional-keras-sequential-vs-functional-api-1"></a></h4>
<ol start="3" style="list-style-type: decimal"><li>This implements the same model using the Sequential API:</li>
</ol><div class="codewrapper sourceCode" id="cb20">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a>model <span class="op">=</span> keras.Sequential(</span>
<span id="cb20-2"><a href="#cb20-2" tabindex="-1"></a>    [</span>
<span id="cb20-3"><a href="#cb20-3" tabindex="-1"></a>        keras.Input(shape<span class="op">=</span>(X_train.shape[<span class="dv">1</span>],)),</span>
<span id="cb20-4"><a href="#cb20-4" tabindex="-1"></a>        keras.layers.Dense(<span class="dv">10</span>, activation<span class="op">=</span><span class="st">"relu"</span>),</span>
<span id="cb20-5"><a href="#cb20-5" tabindex="-1"></a>        keras.layers.Dense(<span class="dv">3</span>, activation<span class="op">=</span><span class="st">"softmax"</span>),</span>
<span id="cb20-6"><a href="#cb20-6" tabindex="-1"></a>    ]</span>
<span id="cb20-7"><a href="#cb20-7" tabindex="-1"></a>)</span></code></pre>
</div>
<p>We will use the Functional API for the remainder of this course,
since it is more flexible and more explicit.</p>
</div>
</div>
</div>
</div>
</div>
<div id="how-to-choose-an-architecture" class="callout">
<div class="callout-square">
<i class="callout-icon" data-feather="bell"></i>
</div>
<div id="how-to-choose-an-architecture" class="callout-inner">
<h3 class="callout-title">How to choose an architecture?</h3>
<div class="callout-content">
<p>Even for this small neural network, we had to make a choice on the
number of hidden neurons. Other choices to be made are the number of
layers and type of layers (as we will see later). You might wonder how
you should make these architectural choices. Unfortunately, there are no
clear rules to follow here, and it often boils down to a lot of trial
and error. However, it is recommended to look what others have done with
similar datasets and problems. Another best practice is to start with a
relatively simple architecture. Once running start to add layers and
tweak the network to see if performance increases.</p>
</div>
</div>
</div>
</div>
<div class="section level3">
<h3 id="choose-a-pretrained-model">Choose a pretrained model<a class="anchor" aria-label="anchor" href="#choose-a-pretrained-model"></a></h3>
<p>If your data and problem is very similar to what others have done,
you can often use a <em>pretrained network</em>. Even if your problem is
different, but the data type is common (for example images), you can use
a pretrained network and finetune it for your problem. A large number of
openly available pretrained networks can be found on <a href="https://huggingface.co/models" class="external-link">Hugging Face</a> (especially LLMs),
<a href="https://monai.io/" class="external-link">MONAI</a> (medical imaging), the <a href="https://modelzoo.co/" class="external-link">Model Zoo</a>, <a href="https://pytorch.org/hub/" class="external-link">pytorch hub</a> or <a href="https://www.tensorflow.org/hub/" class="external-link">tensorflow hub</a>.</p>
</div>
</section><section><h2 class="section-heading" id="choose-a-loss-function-and-optimizer">5. Choose a loss function and optimizer<a class="anchor" aria-label="anchor" href="#choose-a-loss-function-and-optimizer"></a></h2>
<hr class="half-width"><p>We have now designed a neural network that in theory we should be
able to train to classify Penguins. However, we first need to select an
appropriate loss function that we will use during training. This loss
function tells the training algorithm how wrong, or how ‘far away’ from
the true value the predicted value is.</p>
<p>For the one-hot encoding that we selected earlier a suitable loss
function is the Categorical Crossentropy loss. In Keras this is
implemented in the <code>keras.losses.CategoricalCrossentropy</code>
class. This loss function works well in combination with the
<code>softmax</code> activation function we chose earlier. The
Categorical Crossentropy works by comparing the probabilities that the
neural network predicts with ‘true’ probabilities that we generated
using the one-hot encoding. This is a measure for how close the
distribution of the three neural network outputs corresponds to the
distribution of the three values in the one-hot encoding. It is lower if
the distributions are more similar.</p>
<p>For more information on the available loss functions in Keras you can
check the <a href="https://www.tensorflow.org/api_docs/python/tf/keras/losses" class="external-link">documentation</a>.</p>
<p>Next we need to choose which optimizer to use and, if this optimizer
has parameters, what values to use for those. Furthermore, we need to
specify how many times to show the training samples to the
optimizer.</p>
<p>Once more, Keras gives us plenty of choices all of which have their
own pros and cons, but for now let us go with the widely used <a href="https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam" class="external-link">Adam
optimizer</a>. Adam has a number of parameters, but the default values
work well for most problems. So we will use it with its default
parameters.</p>
<p>Combining this with the loss function we decided on earlier we can
now compile the model using <code>model.compile</code>. Compiling the
model prepares it to start the training.</p>
<div class="codewrapper sourceCode" id="cb21">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" tabindex="-1"></a>model.<span class="bu">compile</span>(optimizer<span class="op">=</span><span class="st">'adam'</span>, loss<span class="op">=</span>keras.losses.CategoricalCrossentropy())</span></code></pre>
</div>
</section><section><h2 class="section-heading" id="train-model">6. Train model<a class="anchor" aria-label="anchor" href="#train-model"></a></h2>
<hr class="half-width"><p>We are now ready to train the model.</p>
<p>Training the model is done using the <code>fit</code> method, it
takes the input data and target data as inputs and it has several other
parameters for certain options of the training. Here we only set a
different number of <code>epochs</code>. One training epoch means that
every sample in the training data has been shown to the neural network
and used to update its parameters.</p>
<div class="codewrapper sourceCode" id="cb22">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a>history <span class="op">=</span> model.fit(X_train, y_train, epochs<span class="op">=</span><span class="dv">100</span>)</span></code></pre>
</div>
<p>The fit method returns a history object that has a history attribute
with the training loss and potentially other metrics per training epoch.
It can be very insightful to plot the training loss to see how the
training progresses. Using seaborn we can do this as follow:</p>
<div class="codewrapper sourceCode" id="cb23">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span>history.epoch, y<span class="op">=</span>history.history[<span class="st">'loss'</span>])</span></code></pre>
</div>
<figure><img src="fig/02_training_curve.png" title="Training Curve" alt="Training loss curve of the neural network training which depicts exponential decrease in loss before a plateau from ~10 epochs" class="figure mx-auto d-block"></figure><p>This plot can be used to identify whether the training is well
configured or whether there are problems that need to be addressed.</p>
<div id="the-training-curve" class="callout challenge">
<div class="callout-square">
<i class="callout-icon" data-feather="zap"></i>
</div>
<div id="the-training-curve" class="callout-inner">
<h3 class="callout-title">The Training Curve</h3>
<div class="callout-content">
<p>Looking at the training curve we have just made.</p>
<ol style="list-style-type: decimal"><li>How does the training progress?
<ul><li>Does the training loss increase or decrease?</li>
<li>Does it change quickly or slowly?</li>
<li>Does the graph look very jittery?</li>
</ul></li>
<li>Do you think the resulting trained network will work well on the
test set?</li>
</ol><p>When the training process does not go well:</p>
<ol start="3" style="list-style-type: decimal"><li>(optional) Something went wrong here during training. What could be
the problem, and how do you see that in the training curve? Also compare
the range on the y-axis with the previous training curve. <img src="fig/02_bad_training_history_1.png" title="Training Curve Gone Wrong" alt="Very jittery training curve with the loss value jumping back and forth between 2 and 4. The range of the y-axis is from 2 to 4, whereas in the previous training curve it was from 0 to 2. The loss seems to decrease a litle bit, but not as much as compared to the previous plot where it dropped to almost 0. The minimum loss in the end is somewhere around 2." class="figure"></li>
</ol></div>
</div>
</div>
<div id="accordionSolution4" class="accordion challenge-accordion accordion-flush">
<div class="accordion-item">
<button class="accordion-button solution-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseSolution4" aria-expanded="false" aria-controls="collapseSolution4">
  <h4 class="accordion-header" id="headingSolution4"> Show me the solution </h4>
</button>
<div id="collapseSolution4" class="accordion-collapse collapse" aria-labelledby="headingSolution4" data-bs-parent="#accordionSolution4">
<div class="accordion-body">
<ol style="list-style-type: decimal"><li>The training loss decreases quickly. It drops in a smooth line with
little jitter. This is ideal for a training curve.</li>
<li>The results of the training give very little information on its
performance on a test set. You should be careful not to use it as an
indication of a well trained network.</li>
<li>(optional) The loss does not go down at all, or only very slightly.
This means that the model is not learning anything. It could be that
something went wrong in the data preparation (for example the labels are
not attached to the right features). In addition, the graph is very
jittery. This means that for every update step, the weights in the
network are updated in such a way that the loss sometimes increases a
lot and sometimes decreases a lot. This could indicate that the weights
are updated too much at every learning step and you need a smaller
learning rate (we will go into more details on this in the next
episode). Or there is a high variation in the data, leading the
optimizer to change the weights in different directions at every
learning step. This could be addressed by presenting more data at every
learning step (or in other words increasing the batch size). In this
case the graph was created by training on nonsense data, so this a
training curve for a problem where nothing can be learned really.</li>
</ol><p>We will take a closer look at training curves in the next episode.
Some of the concepts touched upon here will also be further explained
there.</p>
</div>
</div>
</div>
</div>
</section><section><h2 class="section-heading" id="perform-a-predictionclassification">7. Perform a prediction/classification<a class="anchor" aria-label="anchor" href="#perform-a-predictionclassification"></a></h2>
<hr class="half-width"><p>Now that we have a trained neural network, we can use it to predict
new samples of penguin using the <code>predict</code> function.</p>
<p>We will use the neural network to predict the species of the test set
using the <code>predict</code> function. We will be using this
prediction in the next step to measure the performance of our trained
network. This will return a <code>numpy</code> matrix, which we convert
to a pandas dataframe to easily see the labels.</p>
<div class="codewrapper sourceCode" id="cb24">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a>y_pred <span class="op">=</span> model.predict(X_test)</span>
<span id="cb24-2"><a href="#cb24-2" tabindex="-1"></a>prediction <span class="op">=</span> pd.DataFrame(y_pred, columns<span class="op">=</span>target.columns)</span>
<span id="cb24-3"><a href="#cb24-3" tabindex="-1"></a>prediction</span></code></pre>
</div>
<table class="table"><tbody><tr class="odd"><td align="right">0</td>
<td align="right">0.304484</td>
<td align="right">0.192893</td>
<td align="right">0.502623</td>
</tr><tr class="even"><td align="right">1</td>
<td align="right">0.527107</td>
<td align="right">0.095888</td>
<td align="right">0.377005</td>
</tr><tr class="odd"><td align="right">2</td>
<td align="right">0.373989</td>
<td align="right">0.195604</td>
<td align="right">0.430406</td>
</tr><tr class="even"><td align="right">3</td>
<td align="right">0.493643</td>
<td align="right">0.154104</td>
<td align="right">0.352253</td>
</tr><tr class="odd"><td align="right">4</td>
<td align="right">0.309051</td>
<td align="right">0.308646</td>
<td align="right">0.382303</td>
</tr><tr class="even"><td align="right">…</td>
<td align="right">…</td>
<td align="right">…</td>
<td align="right">…</td>
</tr><tr class="odd"><td align="right">64</td>
<td align="right">0.406074</td>
<td align="right">0.191430</td>
<td align="right">0.402496</td>
</tr><tr class="even"><td align="right">65</td>
<td align="right">0.645621</td>
<td align="right">0.077174</td>
<td align="right">0.277204</td>
</tr><tr class="odd"><td align="right">66</td>
<td align="right">0.356284</td>
<td align="right">0.185958</td>
<td align="right">0.457758</td>
</tr><tr class="even"><td align="right">67</td>
<td align="right">0.393868</td>
<td align="right">0.159575</td>
<td align="right">0.446557</td>
</tr><tr class="odd"><td align="right">68</td>
<td align="right">0.509837</td>
<td align="right">0.144219</td>
<td align="right">0.345943</td>
</tr></tbody></table><p>Remember that the output of the network uses the <code>softmax</code>
activation function and has three outputs, one for each species. This
dataframe shows this nicely.</p>
<p>We now need to transform this output to one penguin species per
sample. We can do this by looking for the index of highest valued output
and converting that to the corresponding species. Pandas dataframes have
the <code>idxmax</code> function, which will do exactly that.</p>
<div class="codewrapper sourceCode" id="cb25">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" tabindex="-1"></a>predicted_species <span class="op">=</span> prediction.idxmax(axis<span class="op">=</span><span class="st">"columns"</span>)</span>
<span id="cb25-2"><a href="#cb25-2" tabindex="-1"></a>predicted_species</span></code></pre>
</div>
<div class="codewrapper">
<h3 class="code-label">OUTPUT<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="output" tabindex="0"><code>0     Gentoo
1     Adelie
2     Gentoo
3     Adelie
4     Gentoo
      ...
64    Adelie
65    Adelie
66    Gentoo
67    Gentoo
68    Adelie
Length: 69, dtype: object</code></pre>
</div>

</section><section><h2 class="section-heading" id="measuring-performance">8. Measuring performance<a class="anchor" aria-label="anchor" href="#measuring-performance"></a></h2>
<hr class="half-width"><p>Now that we have a trained neural network it is important to assess
how well it performs. We want to know how well it will perform in a
realistic prediction scenario, measuring performance will also come back
when refining the model.</p>
<p>We have created a test set (i.e. y_test) during the data preparation
stage which we will use now to create a confusion matrix.</p>
<div class="section level3">
<h3 id="confusion-matrix">Confusion matrix<a class="anchor" aria-label="anchor" href="#confusion-matrix"></a></h3>
<p>With the predicted species we can now create a confusion matrix and
display it using seaborn. To create a confusion matrix we will use
another convenient function from sklearn called
<code>confusion_matrix</code>. This function takes as a first parameter
the true labels of the test set. We can get these by using the
<code>idxmax</code> method on the y_test dataframe. The second parameter
is the predicted labels which we did above.</p>
<div class="codewrapper sourceCode" id="cb27">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" tabindex="-1"></a><span class="im">from</span> sklearn.metrics <span class="im">import</span> confusion_matrix</span>
<span id="cb27-2"><a href="#cb27-2" tabindex="-1"></a></span>
<span id="cb27-3"><a href="#cb27-3" tabindex="-1"></a>true_species <span class="op">=</span> y_test.idxmax(axis<span class="op">=</span><span class="st">"columns"</span>)</span>
<span id="cb27-4"><a href="#cb27-4" tabindex="-1"></a></span>
<span id="cb27-5"><a href="#cb27-5" tabindex="-1"></a>matrix <span class="op">=</span> confusion_matrix(true_species, predicted_species)</span>
<span id="cb27-6"><a href="#cb27-6" tabindex="-1"></a><span class="bu">print</span>(matrix)</span></code></pre>
</div>
<div class="codewrapper">
<h3 class="code-label">OUTPUT<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="output" tabindex="0"><code>[[22  0  8]
 [ 5  0  9]
 [ 6  0 19]]</code></pre>
</div>
<p>Unfortunately, this matrix is not immediately understandable. Its not
clear which column and which row corresponds to which species. So let’s
convert it to a pandas dataframe with its index and columns set to the
species as follows:</p>
<div class="codewrapper sourceCode" id="cb29">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" tabindex="-1"></a><span class="co"># Convert to a pandas dataframe</span></span>
<span id="cb29-2"><a href="#cb29-2" tabindex="-1"></a>confusion_df <span class="op">=</span> pd.DataFrame(matrix, index<span class="op">=</span>y_test.columns.values, columns<span class="op">=</span>y_test.columns.values)</span>
<span id="cb29-3"><a href="#cb29-3" tabindex="-1"></a></span>
<span id="cb29-4"><a href="#cb29-4" tabindex="-1"></a><span class="co"># Set the names of the x and y axis, this helps with the readability of the heatmap.</span></span>
<span id="cb29-5"><a href="#cb29-5" tabindex="-1"></a>confusion_df.index.name <span class="op">=</span> <span class="st">'True Label'</span></span>
<span id="cb29-6"><a href="#cb29-6" tabindex="-1"></a>confusion_df.columns.name <span class="op">=</span> <span class="st">'Predicted Label'</span></span>
<span id="cb29-7"><a href="#cb29-7" tabindex="-1"></a>confusion_df.head()</span></code></pre>
</div>
<p>We can then use the <code>heatmap</code> function from seaborn to
create a nice visualization of the confusion matrix. The
<code>annot=True</code> parameter here will put the numbers from the
confusion matrix in the heatmap.</p>
<div class="codewrapper sourceCode" id="cb30">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" tabindex="-1"></a>sns.heatmap(confusion_df, annot<span class="op">=</span><span class="va">True</span>)</span></code></pre>
</div>
<figure><img src="fig/confusion_matrix.png" title="Confusion Matrix" alt="Confusion matrix of the test set with high accuracy for Adelie and Gentoo classification and no correctly predicted Chinstrap" class="figure mx-auto d-block"></figure><div id="confusion-matrix-1" class="callout challenge">
<div class="callout-square">
<i class="callout-icon" data-feather="zap"></i>
</div>
<div id="confusion-matrix-1" class="callout-inner">
<h3 class="callout-title">Confusion Matrix</h3>
<div class="callout-content">
<p>Measure the performance of the neural network you trained and
visualize a confusion matrix.</p>
<ul><li>Did the neural network perform well on the test set?</li>
<li>Did you expect this from the training loss you saw?</li>
<li>What could we do to improve the performance?</li>
</ul></div>
</div>
</div>
<div id="accordionSolution5" class="accordion challenge-accordion accordion-flush">
<div class="accordion-item">
<button class="accordion-button solution-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseSolution5" aria-expanded="false" aria-controls="collapseSolution5">
  <h4 class="accordion-header" id="headingSolution5"> Show me the solution </h4>
</button>
<div id="collapseSolution5" class="accordion-collapse collapse" aria-labelledby="headingSolution5" data-bs-parent="#accordionSolution5">
<div class="accordion-body">
<p>The confusion matrix shows that the predictions for Adelie and Gentoo
are decent, but could be improved. However, Chinstrap is not predicted
ever. The training loss was very low, so from that perspective this may
be surprising. But this illustrates very well why a test set is
important when training neural networks. We can try many things to
improve the performance from here. One of the first things we can try is
to balance the dataset better. Other options include: changing the
network architecture or changing the training parameters</p>
<p>Note that the outcome you have might be slightly different from what
is shown in this tutorial.</p>
</div>
</div>
</div>
</div>
</div>
</section><section><h2 class="section-heading" id="refine-the-model">9. Refine the model<a class="anchor" aria-label="anchor" href="#refine-the-model"></a></h2>
<hr class="half-width"><p>As we discussed before the design and training of a neural network
comes with many hyperparameter and model architecture choices. We will
go into more depth of these choices in later episodes. For now it is
important to realize that the parameters we chose were somewhat
arbitrary and more careful consideration needs to be taken to pick
hyperparameter values.</p>
</section><section><h2 class="section-heading" id="share-model">10. Share model<a class="anchor" aria-label="anchor" href="#share-model"></a></h2>
<hr class="half-width"><p>It is very useful to be able to use the trained neural network at a
later stage without having to retrain it. This can be done by using the
<code>save</code> method of the model. It takes a string as a parameter
which is the path of a directory where the model is stored.</p>
<div class="codewrapper sourceCode" id="cb31">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" tabindex="-1"></a>model.save(<span class="st">'my_first_model'</span>)</span></code></pre>
</div>
<p>This saved model can be loaded again by using the
<code>load_model</code> method as follows:</p>
<div class="codewrapper sourceCode" id="cb32">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" tabindex="-1"></a>pretrained_model <span class="op">=</span> keras.models.load_model(<span class="st">'my_first_model'</span>)</span></code></pre>
</div>
<p>This loaded model can be used as before to predict.</p>
<div class="codewrapper sourceCode" id="cb33">
<h3 class="code-label">PYTHON<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="sourceCode python" tabindex="0"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" tabindex="-1"></a><span class="co"># use the pretrained model here</span></span>
<span id="cb33-2"><a href="#cb33-2" tabindex="-1"></a>y_pretrained_pred <span class="op">=</span> pretrained_model.predict(X_test)</span>
<span id="cb33-3"><a href="#cb33-3" tabindex="-1"></a>pretrained_prediction <span class="op">=</span> pd.DataFrame(y_pretrained_pred, columns<span class="op">=</span>target.columns.values)</span>
<span id="cb33-4"><a href="#cb33-4" tabindex="-1"></a></span>
<span id="cb33-5"><a href="#cb33-5" tabindex="-1"></a><span class="co"># idxmax will select the column for each row with the highest value</span></span>
<span id="cb33-6"><a href="#cb33-6" tabindex="-1"></a>pretrained_predicted_species <span class="op">=</span> pretrained_prediction.idxmax(axis<span class="op">=</span><span class="st">"columns"</span>)</span>
<span id="cb33-7"><a href="#cb33-7" tabindex="-1"></a><span class="bu">print</span>(pretrained_predicted_species)</span></code></pre>
</div>
<div class="codewrapper">
<h3 class="code-label">OUTPUT<i aria-hidden="true" data-feather="chevron-left"></i><i aria-hidden="true" data-feather="chevron-right"></i>
</h3>
<pre class="output" tabindex="0"><code>0     Adelie
1     Gentoo
2     Adelie
3     Gentoo
4     Gentoo
      ...
64    Gentoo
65    Gentoo
66    Adelie
67    Adelie
68    Gentoo
Length: 69, dtype: object</code></pre>
</div>
<div id="keypoints1" class="callout keypoints">
<div class="callout-square">
<i class="callout-icon" data-feather="key"></i>
</div>
<div class="callout-inner">
<h3 class="callout-title">Key Points</h3>
<div class="callout-content">
<ul><li>The deep learning workflow is a useful tool to structure your
approach, it helps to make sure you do not forget any important
steps.</li>
<li>Exploring the data is an important step to familiarize yourself with
the problem and to help you determine the relavent inputs and
outputs.</li>
<li>One-hot encoding is a preprocessing step to prepare labels for
classification in Keras.</li>
<li>A fully connected layer is a layer which has connections to all
neurons in the previous and subsequent layers.</li>
<li>keras.layers.Dense is an implementation of a fully connected layer,
you can set the number of neurons in the layer and the activation
function used.</li>
<li>To train a neural network with Keras we need to first define the
network using layers and the Model class. Then we can train it using the
model.fit function.</li>
<li>Plotting the loss curve can be used to identify and troubleshoot the
training process.</li>
<li>The loss curve on the training set does not provide any information
on how well a network performs in a real setting.</li>
<li>Creating a confusion matrix with results from a test set gives
better insight into the network’s performance.</li>
</ul></div>
</div>
</div>
<!--
Place links that you need to refer to multiple times across pages here. Delete
any links that you are not going to use.
 -->
</section></div> <!-- / div.lesson-content -->
    </main><!-- / main#main-content.main-content --><nav class="bottom-pagination mx-md-4" aria-label="Previous and Next Chapter"><div class="d-block d-sm-block d-md-none">
        <a class="chapter-link" href="1-introduction.html"><i aria-hidden="true" class="small-arrow" data-feather="arrow-left"></i>Previous</a>
        <a class="chapter-link float-end" href="3-monitor-the-model.html">Next<i aria-hidden="true" class="small-arrow" data-feather="arrow-right"></i></a>
      </div>
      <!-- content for large screens -->
      <div class="d-none d-sm-none d-md-block">
        <a class="chapter-link" href="1-introduction.html" rel="prev">
          <i aria-hidden="true" class="small-arrow" data-feather="arrow-left"></i>
          Previous: Introduction
        </a>
        <a class="chapter-link float-end" href="3-monitor-the-model.html" rel="next">
          Next: Monitor the training...
          <i aria-hidden="true" class="small-arrow" data-feather="arrow-right"></i>
        </a>
      </div>
    </nav></div> <!-- / div.primary-content.col-xs-12 -->
<!-- END:   inst/pkgdown/templates/content-instructor.html-->

      </div><!--/div.row-->
      		<footer class="row footer mx-md-3"><hr><div class="col-md-6">
        <p>This lesson is subject to the <a href="CODE_OF_CONDUCT.html">Code of Conduct</a></p>
        <p>

        <a href="https://github.com/carpentries-incubator/deep-learning-intro/edit/main/episodes/2-keras.Rmd" class="external-link">Edit on GitHub</a>

	
        | <a href="https://github.com/carpentries-incubator/deep-learning-intro/blob/main/CONTRIBUTING.md" class="external-link">Contributing</a>
        | <a href="https://github.com/carpentries-incubator/deep-learning-intro/" class="external-link">Source</a></p>
				<p><a href="https://github.com/carpentries-incubator/deep-learning-intro/blob/main/CITATION.cff" class="external-link">Cite</a> | <a href="mailto:team@carpentries.org">Contact</a> | <a href="https://carpentries.org/about/" class="external-link">About</a></p>
			</div>
			<div class="col-md-6">

        <p>Materials licensed under <a href="LICENSE.html">CC-BY 4.0</a> by the authors</p>

        <p>Template licensed under <a href="https://creativecommons.org/licenses/by-sa/4.0/" class="external-link">CC-BY 4.0</a> by <a href="https://carpentries.org/" class="external-link">The Carpentries</a></p>
        <p>Built with <a href="https://github.com/carpentries/sandpaper/tree/0.16.10" class="external-link">sandpaper (0.16.10)</a>, <a href="https://github.com/carpentries/pegboard/tree/0.7.7" class="external-link">pegboard (0.7.7)</a>, and <a href="https://github.com/carpentries/varnish/tree/1.0.5" class="external-link">varnish (1.0.5)</a></p>
			</div>
		</footer></div> <!-- / div.container -->
	<div id="to-top">
		<a href="#top">
      <i class="search-icon" data-feather="arrow-up" role="img" aria-label="Back To Top"></i><br><!-- <span class="d-none d-sm-none d-md-none d-lg-none d-xl-block">Back</span> To Top --><span class="d-none d-sm-none d-md-none d-lg-none d-xl-block">Back</span> To Top
		</a>
	</div>
  <script type="application/ld+json">
    {
  "@context": "https://schema.org",
  "@type": "TrainingMaterial",
  "@id": "https://carpentries-incubator.github.io/deep-learning-intro/2-keras.html",
  "inLanguage": "en",
  "dct:conformsTo": "https://bioschemas.org/profiles/TrainingMaterial/1.0-RELEASE",
  "description": "A Carpentries Lesson teaching foundational data and coding skills to researchers worldwide",
  "keywords": "deep learning, keras, lesson, The Carpentries, neural networks",
  "name": "Classification by a neural network using Keras",
  "creativeWorkStatus": "active",
  "url": "https://carpentries-incubator.github.io/deep-learning-intro/2-keras.html",
  "identifier": "https://carpentries-incubator.github.io/deep-learning-intro/2-keras.html",
  "dateCreated": "2020-10-17",
  "dateModified": "2024-12-03",
  "datePublished": "2024-12-03"
}

  </script><script>
		feather.replace();
	</script></body></html><!-- END:   inst/pkgdown/templates/layout.html-->