tuto.html



<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Tutorials &mdash; N2D2  documentation</title>
  

  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />

  
  <!--[if lt IE 9]>
    <script src="_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script src="_static/jquery.js"></script>
        <script src="_static/underscore.js"></script>
        <script src="_static/doctools.js"></script>
        <script src="_static/language_data.js"></script>
        <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    
    <link rel="author" title="About these documents" href="about.html" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="Obtain ONNX models" href="onnx_convert.html" />
    <link rel="prev" title="Performance evaluation tools" href="perfs_tools.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

            <a href="index.html" class="icon icon-home" alt="Documentation Home"> N2D2
          

          </a>

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
              <p class="caption"><span class="caption-text">Introduction:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="intro.html">Presentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="about.html">About N2D2-IP</a></li>
<li class="toctree-l1"><a class="reference internal" href="simus.html">Performing simulations</a></li>
<li class="toctree-l1"><a class="reference internal" href="perfs_tools.html">Performance evaluation tools</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Tutorials</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#learning-deep-neural-networks-tips-and-tricks">Learning deep neural networks: tips and tricks</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#choose-the-learning-solver">Choose the learning solver</a></li>
<li class="toctree-l3"><a class="reference internal" href="#choose-the-learning-hyper-parameters">Choose the learning hyper-parameters</a></li>
<li class="toctree-l3"><a class="reference internal" href="#convergence-and-normalization">Convergence and normalization</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#building-a-classifier-neural-network">Building a classifier neural network</a></li>
<li class="toctree-l2"><a class="reference internal" href="#building-a-segmentation-neural-network">Building a segmentation neural network</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#faces-detection">Faces detection</a></li>
<li class="toctree-l3"><a class="reference internal" href="#gender-recognition">Gender recognition</a></li>
<li class="toctree-l3"><a class="reference internal" href="#rois-extraction">ROIs extraction</a></li>
<li class="toctree-l3"><a class="reference internal" href="#data-visualization">Data visualization</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#transcoding-a-learned-network-in-spike-coding">Transcoding a learned network in spike-coding</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#render-the-network-compatible-with-spike-simulations">Render the network compatible with spike simulations</a></li>
<li class="toctree-l3"><a class="reference internal" href="#configure-spike-coding-parameters">Configure spike-coding parameters</a></li>
</ul>
</li>
</ul>
</li>
</ul>
<p class="caption"><span class="caption-text">ONNX Import:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="onnx_convert.html">Obtain ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_import.html">Import ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_transfer.html">Train from ONNX models</a></li>
</ul>
<p class="caption"><span class="caption-text">Quantization and Export:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="quant_post.html">Post-training quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="quant_qat.html">[NEW] Quantization-Aware Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP.html">Export: C++</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP_STM32.html">Export: C++/STM32</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_TensorRT.html">Export: TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_DNeuro.html">Export: DNeuro</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_ONNX.html">Export: ONNX</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_legacy.html">Export: other / legacy</a></li>
</ul>
<p class="caption"><span class="caption-text">INI File Interface:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="ini_intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_data_analysis.html">Stimuli data analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_environment.html">Stimuli provider (Environment)</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_layers.html">Network Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_target.html">Targets (outputs &amp; losses)</a></li>
<li class="toctree-l1"><a class="reference internal" href="adversarial.html">Adversarial module</a></li>
</ul>
<p class="caption"><span class="caption-text">Python API:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="containers.html">Containers</a></li>
<li class="toctree-l1"><a class="reference internal" href="cells.html">Cells</a></li>
<li class="toctree-l1"><a class="reference internal" href="databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="stimuliprovider.html">StimuliProvider</a></li>
<li class="toctree-l1"><a class="reference internal" href="deepnet.html">DeepNet</a></li>
</ul>
<p class="caption"><span class="caption-text">C++ API / Developer:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="dev_intro.html">Introduction</a></li>
</ul>

            
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">N2D2</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          
<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
        
      <li>Tutorials</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="_sources/tuto.rst.txt" rel="nofollow"> View page source</a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="tutorials">
<h1>Tutorials<a class="headerlink" href="#tutorials" title="Permalink to this headline">¶</a></h1>
<div class="section" id="learning-deep-neural-networks-tips-and-tricks">
<h2>Learning deep neural networks: tips and tricks<a class="headerlink" href="#learning-deep-neural-networks-tips-and-tricks" title="Permalink to this headline">¶</a></h2>
<div class="section" id="choose-the-learning-solver">
<h3>Choose the learning solver<a class="headerlink" href="#choose-the-learning-solver" title="Permalink to this headline">¶</a></h3>
<p>Generally, you should use the SGD solver with a momemtum (typical value
for the momentum: 0.9). It generalizes better, often significantly
better, than adaptive methods like Adam
<a class="bibtex reference internal" href="#arxiv170508292w" id="id1">[WilsonRoelofsStern+17]</a>.</p>
<p>Adaptive solvers, like Adam, may be used for fast exploration and
prototyping, thanks to their fast convergence.</p>
</div>
<div class="section" id="choose-the-learning-hyper-parameters">
<h3>Choose the learning hyper-parameters<a class="headerlink" href="#choose-the-learning-hyper-parameters" title="Permalink to this headline">¶</a></h3>
<p>You can use the <code class="docutils literal notranslate"><span class="pre">-find-lr</span></code> option available in the <code class="docutils literal notranslate"><span class="pre">n2d2</span></code> executable
to automatically find the best learning rate for a given neural network.</p>
<p>Usage example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">model</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">find</span><span class="o">-</span><span class="n">lr</span> <span class="mi">10000</span>
</pre></div>
</div>
<p>This commnand starts from a very low learning rate (1.0e-6) and increase
it exponentially to reach the maximum value (10.0) after 10000 steps, as
shown in figure [fig:findLrRange]. The loss change during this phase is
then plotted in function of the learning rate, as shown in figure
[fig:findLr].</p>
<div class="figure align-default" id="id4">
<img alt="Exponential increase of the learning rate over the specified number of iterations, equals to the number of steps divided by the batch size (here: 24)." src="_images/find_lr-range.png" />
<p class="caption"><span class="caption-text">Exponential increase of the learning rate over the specified number
of iterations, equals to the number of steps divided by the batch
size (here: 24).</span><a class="headerlink" href="#id4" title="Permalink to this image">¶</a></p>
</div>
<div class="figure align-default" id="id5">
<img alt="Loss change as a function of the learning rate." src="_images/find_lr.png" />
<p class="caption"><span class="caption-text">Loss change as a function of the learning rate.</span><a class="headerlink" href="#id5" title="Permalink to this image">¶</a></p>
</div>
<p>Note that in N2D2, the learning rate is automatically normalized by the
global batch size (<span class="math notranslate nohighlight">\(N \times \text{\lstinline!IterationSize!}\)</span>)
for the <code class="docutils literal notranslate"><span class="pre">SGDSolver</span></code>. A simple linear scaling rule is used, as
recommanded in <a class="bibtex reference internal" href="#dblp-journals-corr-goyaldgnwktjh17" id="id2">[GDollarG+17]</a>.
The effective learning rate <span class="math notranslate nohighlight">\(\alpha_{\text{eff}}\)</span> applied for
parameters update is therefore:</p>
<div class="math notranslate nohighlight">
\[\alpha_{\text{eff}} = \frac{\alpha}{N \times \text{\lstinline!IterationSize!}} \text{ with $\alpha$ = \lstinline!LearningRate!}\]</div>
<p>Typical values for the <code class="docutils literal notranslate"><span class="pre">SGDSolver</span></code> are:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Solvers</span><span class="o">.</span><span class="n">LearningRate</span><span class="o">=</span><span class="mf">0.01</span>
<span class="n">Solvers</span><span class="o">.</span><span class="n">Decay</span><span class="o">=</span><span class="mf">0.0001</span>
<span class="n">Solvers</span><span class="o">.</span><span class="n">Momentum</span><span class="o">=</span><span class="mf">0.9</span>
</pre></div>
</div>
</div>
<div class="section" id="convergence-and-normalization">
<h3>Convergence and normalization<a class="headerlink" href="#convergence-and-normalization" title="Permalink to this headline">¶</a></h3>
<p>Deep networks (&gt; 30 layers) and especially residual networks usually
don’t converge without normalization. Indeed, batch normalization is
almost always used. <em>ZeroInit</em> is a method that can be used to overcome
this issue without normalization <a class="bibtex reference internal" href="#zhang2018residual" id="id3">[ZDM19]</a>.</p>
</div>
</div>
<div class="section" id="building-a-classifier-neural-network">
<h2>Building a classifier neural network<a class="headerlink" href="#building-a-classifier-neural-network" title="Permalink to this headline">¶</a></h2>
<p>For this tutorial, we will use the classical MNIST handwritten digit
dataset. A driver module already exists for this dataset, named
<code class="docutils literal notranslate"><span class="pre">MNIST_IDX_Database</span></code>.</p>
<p>To instantiate it, just add the following lines in a new INI file:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">MNIST_IDX_Database</span>
<span class="na">Validation</span><span class="o">=</span><span class="s">0.2 ; Use 20\% of the dataset for validation</span>
</pre></div>
</div>
<p>In order to create a neural network, we first need to define its input,
which is declared with a <code class="docutils literal notranslate"><span class="pre">[sp]</span></code> section (<em>sp</em> for <em>StimuliProvider</em>).
In this section, we configure the size of the input and the batch size:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">32</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">32</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">128</span>
</pre></div>
</div>
<p>We can also add pre-processing transformations to the <em>StimuliProvider</em>,
knowing that the final data size after transformations must match the
size declared in the <code class="docutils literal notranslate"><span class="pre">[sp]</span></code> section. Here, we must rescale the MNIST
28x28 images to match the 32x32 network input size.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp.Transformation_1]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp]SizeY</span>
</pre></div>
</div>
<p>Next, we declare the neural network layers. In this example, we
reproduced the well-known LeNet network. The first layer is a 5x5
convolutional layer, with 6 channels. Since there is only one input
channel, there will be only 6 convolution kernels in this layer.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[conv1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">sp</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>
</pre></div>
</div>
<p>The next layer is a 2x2 MAX pooling layer, with a stride of 2
(non-overlapping MAX pooling).</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[pool1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv1]NbOutputs</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1 ; One to one connection between input and output channels</span>
</pre></div>
</div>
<p>The next layer is a 5x5 convolutional layer with 16 channels.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[conv2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">16</span>
</pre></div>
</div>
<p>Note that in LeNet, the <code class="docutils literal notranslate"><span class="pre">[conv2]</span></code> layer is not fully connected to the
pooling layer. In N2D2, a custom mapping can be defined for each input
connection. The connection of <span class="math notranslate nohighlight">\(n\)</span>-th output map to the inputs is
defined by the <span class="math notranslate nohighlight">\(n\)</span>-th column of the matrix below, where the rows
correspond to the inputs.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">Mapping(pool1)</span><span class="o">=</span><span class="s">\</span>
<span class="na">1 0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 \</span>
<span class="na">1 1 0 0 0 1 1 1 0 0 1 1 1 1 0 1 \</span>
<span class="na">1 1 1 0 0 0 1 1 1 0 0 1 0 1 1 1 \</span>
<span class="na">0 1 1 1 0 0 1 1 1 1 0 0 1 0 1 1 \</span>
<span class="na">0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 \</span>
<span class="na">0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 1</span>
</pre></div>
</div>
<p>Another MAX pooling and convolution layer follow:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[pool2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv2]NbOutputs</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1</span>

<span class="k">[conv3]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">120</span>
</pre></div>
</div>
<p>The network is composed of two fully-connected layers of 84 and 10
neurons respectively:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv3</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Fc</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">84</span>

<span class="k">[fc2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Fc</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">10</span>
</pre></div>
</div>
<p>Finally, we use a softmax layer to obtain output classification
probabilities and compute the loss function.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[softmax]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Softmax</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[fc2]NbOutputs</span>
<span class="na">WithLoss</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>In order to tell N2D2 to compute the error and the classification score
on this softmax layer, one must attach a N2D2 <em>Target</em> to this layer,
with a section with the same name suffixed with <code class="docutils literal notranslate"><span class="pre">.Target</span></code>:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[softmax.Target]</span>
</pre></div>
</div>
<p>By default, the activation function for the convolution and the
fully-connected layers is the hyperbolic tangent. Because the <code class="docutils literal notranslate"><span class="pre">[fc2]</span></code>
layer is fed to a softmax, it should not have any activation function.
We can specify it by adding the following line in the <code class="docutils literal notranslate"><span class="pre">[fc2]</span></code> section:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc2]</span>
<span class="na">...</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
</pre></div>
</div>
<p>In order to improve further the networks performances, several things
can be done:</p>
<p><strong>Use ReLU activation functions.</strong> In order to do so, just add the
following in the <code class="docutils literal notranslate"><span class="pre">[conv1]</span></code>, <code class="docutils literal notranslate"><span class="pre">[conv2]</span></code>, <code class="docutils literal notranslate"><span class="pre">[conv3]</span></code> and <code class="docutils literal notranslate"><span class="pre">[fc1]</span></code>
layer sections:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Rectifier</span>
</pre></div>
</div>
<p>For the ReLU activation function to be effective, the weights must be
initialized carefully, in order to avoid dead units that would be stuck
in the <span class="math notranslate nohighlight">\(]-\infty,0]\)</span> output range before the ReLU function. In
N2D2, one can use a custom <code class="docutils literal notranslate"><span class="pre">WeightsFiller</span></code> for the weights
initialization. For the ReLU activation function, a popular and
efficient filler is the so-called <code class="docutils literal notranslate"><span class="pre">XavierFiller</span></code> (see the
[par:XavierFiller] section for more information):</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller</span>
</pre></div>
</div>
<p><strong>Use dropout layers.</strong> Dropout is highly effective to improve the
network generalization capacity. Here is an example of a dropout layer
inserted between the <code class="docutils literal notranslate"><span class="pre">[fc1]</span></code> and <code class="docutils literal notranslate"><span class="pre">[fc2]</span></code> layers:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc1]</span>
<span class="na">...</span>

<span class="k">[fc1.drop]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Dropout</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[fc1]NbOutputs</span>

<span class="k">[fc2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1.drop ; Replaces &quot;Input=fc1&quot;</span>
<span class="na">...</span>
</pre></div>
</div>
<p><strong>Tune the learning parameters.</strong> You may want to tune the learning rate
and other learning parameters depending on the learning problem at hand.
In order to do so, you can add a configuration section that can be
common (or not) to all the layers. Here is an example of configuration
section:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[conv1]</span>
<span class="na">...</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>

<span class="k">[...]</span>
<span class="na">...</span>

<span class="k">[common.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">1</span>
<span class="na">WeightsSolver.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">WeightsSolver.Decay</span><span class="o">=</span><span class="s">0.0005</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">StepDecay</span>
<span class="na">Solvers.LearningRateStepSize</span><span class="o">=</span><span class="s">[sp]_EpochSize</span>
<span class="na">Solvers.LearningRateDecay</span><span class="o">=</span><span class="s">0.993</span>
<span class="na">Solvers.Clamping</span><span class="o">=</span><span class="s">-1.0:1.0</span>
</pre></div>
</div>
<p>For more details on the configuration parameters for the <code class="docutils literal notranslate"><span class="pre">Solver</span></code>, see
section [sec:WeightSolvers].</p>
<p><strong>Add input distortion.</strong> See for example the
<code class="docutils literal notranslate"><span class="pre">DistortionTransformation</span></code> (section [par:DistortionTransformation]).</p>
<p>The complete INI model corresponding to this tutorial can be found in
<em>models/LeNet.ini</em>.</p>
<p>In order to use CUDA/GPU accelerated learning, the default layer model
should be switched to <code class="docutils literal notranslate"><span class="pre">Frame_CUDA</span></code>. You can enable this model by
adding the following line at the top of the INI file (before the first
section):</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">DefaultModel</span><span class="o">=</span><span class="s">Frame_CUDA</span>
</pre></div>
</div>
</div>
<div class="section" id="building-a-segmentation-neural-network">
<h2>Building a segmentation neural network<a class="headerlink" href="#building-a-segmentation-neural-network" title="Permalink to this headline">¶</a></h2>
<p>In this tutorial, we will learn how to do image segmentation with N2D2.
As an example, we will implement a face detection and gender recognition
neural network, using the IMDB-WIKI dataset.</p>
<p>First, we need to instanciate the IMDB-WIKI dataset built-in N2D2
driver:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">IMDBWIKI_Database</span>
<span class="na">WikiSet</span><span class="o">=</span><span class="s">1 ; Use the WIKI part of the dataset</span>
<span class="na">IMDBSet</span><span class="o">=</span><span class="s">0 ; Don&#39;t use the IMDB part (less accurate annotation)</span>
<span class="na">Learn</span><span class="o">=</span><span class="s">0.90</span>
<span class="na">Validation</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">DefaultLabel</span><span class="o">=</span><span class="s">background ; Label for pixels outside any ROI (default is no label, pixels are ignored)</span>
</pre></div>
</div>
<p>We must specify a default label for the background, because we want to
learn to differenciate faces from the background (and not simply ignore
the background for the learning).</p>
<p>The network input is then declared:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">480</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">360</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">48</span>
<span class="na">CompositeStimuli</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>In order to work with segmented data, i.e. data with bounding box
annotations or pixel-wise annotations (as opposed to a single label per
data), one must enable the <code class="docutils literal notranslate"><span class="pre">CompositeStimuli</span></code> option in the <code class="docutils literal notranslate"><span class="pre">[sp]</span></code>
section.</p>
<p>We can then perform various operations on the data before feeding it to
the network, like for example converting the 3-channels RGB input images
to single-channel gray images:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp.Transformation-1]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ChannelExtractionTransformation</span>
<span class="na">CSChannel</span><span class="o">=</span><span class="s">Gray</span>
</pre></div>
</div>
<p>We must only rescale the images to match the networks input size. This
can be done using a <code class="docutils literal notranslate"><span class="pre">RescaleTransformation</span></code>, followed by a
<code class="docutils literal notranslate"><span class="pre">PadCropTransformation</span></code> if one want to keep the images aspect ratio.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp.Transformation-2]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp]SizeY</span>
<span class="na">KeepAspectRatio</span><span class="o">=</span><span class="s">1 ; Keep images aspect ratio</span>

<span class="c1">; Required to ensure all the images are the same size</span>
<span class="k">[sp.Transformation-3]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">PadCropTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp]SizeY</span>
</pre></div>
</div>
<p>A common additional operation to extend the learning set is to apply
random horizontal mirror to images. This can be achieved with the
following <code class="docutils literal notranslate"><span class="pre">FlipTransformation</span></code>:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[sp.OnTheFlyTransformation-4]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">FlipTransformation</span>
<span class="na">RandomHorizontalFlip</span><span class="o">=</span><span class="s">1</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">LearnOnly ; Apply this transformation only on the learning set</span>
</pre></div>
</div>
<p>Note that this is an <em>on-the-fly</em> transformation, meaning it cannot be
cached and is re-executed every time even for the same stimuli. We also
apply this transformation only on the learning set, with the <code class="docutils literal notranslate"><span class="pre">ApplyTo</span></code>
option.</p>
<p>Next, the neural network can be described:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[conv1.1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">sp</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">...</span>

<span class="k">[pool1]</span>
<span class="na">...</span>

<span class="k">[...]</span>
<span class="na">...</span>

<span class="k">[fc2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">drop1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">...</span>

<span class="k">[drop2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Dropout</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[fc2]NbOutputs</span>
</pre></div>
</div>
<p>A full network description can be found in the <em>IMDBWIKI.ini</em> file in
the <em>models</em> directory of N2D2. It is a fully-CNN network.</p>
<p>Here we will focus on the output layers required to detect the faces and
classify their gender. We start from the <code class="docutils literal notranslate"><span class="pre">[drop2]</span></code> layer, which has
128 channels of size 60x45.</p>
<div class="section" id="faces-detection">
<h3>Faces detection<a class="headerlink" href="#faces-detection" title="Permalink to this headline">¶</a></h3>
<p>We want to first add an output stage for the faces detection. It is a
1x1 convolutional layer with a single 60x45 output map. For each output
pixel, this layer outputs the probability that the pixel belongs to a
face.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc3.face]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">drop2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">1</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">1</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">1</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">1</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">LogisticWithLoss</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config ; Same solver options that the other layers</span>
</pre></div>
</div>
<p>In order to do so, the activation function of this layer must be of type
<code class="docutils literal notranslate"><span class="pre">LogisticWithLoss</span></code>.</p>
<p>We must also tell N2D2 to compute the error and the classification score
on this softmax layer, by attaching a N2D2 <em>Target</em> to this layer, with
a section with the same name suffixed with <code class="docutils literal notranslate"><span class="pre">.Target</span></code>:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc3.face.Target]</span>
<span class="na">LabelsMapping</span><span class="o">=</span><span class="s">\${N2D2_MODELS}/IMDBWIKI_target_face.dat</span>
<span class="c1">; Visualization parameters</span>
<span class="na">NoDisplayLabel</span><span class="o">=</span><span class="s">0</span>
<span class="na">LabelsHueOffset</span><span class="o">=</span><span class="s">90</span>
</pre></div>
</div>
<p>In this <em>Target</em>, we must specify how the dataset annotations are mapped
to the layer’s output. This can be done in a separate file using the
<code class="docutils literal notranslate"><span class="pre">LabelsMapping</span></code> parameter. Here, since the output layer has a single
output per pixel, the target value can only be 0 or 1. A target value of
-1 means that this output is ignored (no error back-propagated). Since
the only annotations in the IMDB-WIKI dataset are faces, the mapping
described in the <em>IMDBWIKI_target_face.dat</em> file is easy:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># background</span>
<span class="n">background</span> <span class="mi">0</span>

<span class="c1"># padding (*) is ignored (-1)</span>
<span class="o">*</span> <span class="o">-</span><span class="mi">1</span>

<span class="c1"># not background = face</span>
<span class="n">default</span> <span class="mi">1</span>
</pre></div>
</div>
</div>
<div class="section" id="gender-recognition">
<h3>Gender recognition<a class="headerlink" href="#gender-recognition" title="Permalink to this headline">¶</a></h3>
<p>We can also add a second output stage for gender recognition. Like
before, it would be a 1x1 convolutional layer with a single 60x45 output
map. But here, for each output pixel, this layer would output the
probability that the pixel represents a female face.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc3.gender]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">drop2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">1</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">1</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">1</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">1</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">LogisticWithLoss</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
</pre></div>
</div>
<p>The output layer is therefore identical to the face’s output layer, but
the target mapping is different. For the target mapping, the idea is
simply to ignore all pixels not belonging to a face and affect the
target 0 to male pixels and the target 1 to female pixels.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[fc3.gender.Target]</span>
<span class="na">LabelsMapping</span><span class="o">=</span><span class="s">\${N2D2_MODELS}/IMDBWIKI_target_gender.dat</span>
<span class="c1">; Only display gender probability for pixels detected as face pixels</span>
<span class="na">MaskLabelTarget</span><span class="o">=</span><span class="s">fc3.face.Target</span>
<span class="na">MaskedLabel</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>The content of the <em>IMDBWIKI_target_gender.dat</em> file would therefore
look like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span># background
# ?-* (unknown gender)
# padding
default -1

# male gender
M-? 0   # unknown age
M-0 0
M-1 0
M-2 0
...
M-98 0
M-99 0

# female gender
F-? 1   # unknown age
F-0 1
F-1 1
F-2 1
...
F-98 1
F-99 1
</pre></div>
</div>
</div>
<div class="section" id="rois-extraction">
<h3>ROIs extraction<a class="headerlink" href="#rois-extraction" title="Permalink to this headline">¶</a></h3>
<p>The next step would be to extract detected face ROIs and assign for each
ROI the most probable gender. To this end, we can first set a detection
threshold, in terms of probability, to select face pixels. In the
following, the threshold is fixed to 75% face probability:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[post.Transformation-thres]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc3.face</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Transformation</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">1</span>
<span class="na">Transformation</span><span class="o">=</span><span class="s">ThresholdTransformation</span>
<span class="na">Operation</span><span class="o">=</span><span class="s">ToZero</span>
<span class="na">Threshold</span><span class="o">=</span><span class="s">0.75</span>
</pre></div>
</div>
<p>We can then assign a target of type <code class="docutils literal notranslate"><span class="pre">TargetROIs</span></code> to this layer that
will automatically create the bounding box using a segmentation
algorithm.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[post.Transformation-thres.Target-face]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">TargetROIs</span>
<span class="na">MinOverlap</span><span class="o">=</span><span class="s">0.33 ; Min. overlap fraction to match the ROI to an annotation</span>
<span class="na">FilterMinWidth</span><span class="o">=</span><span class="s">5 ; Min. ROI width</span>
<span class="na">FilterMinHeight</span><span class="o">=</span><span class="s">5 ; Min. ROI height</span>
<span class="na">FilterMinAspectRatio</span><span class="o">=</span><span class="s">0.5 ; Min. ROI aspect ratio</span>
<span class="na">FilterMaxAspectRatio</span><span class="o">=</span><span class="s">1.5 ; Max. ROI aspect ratio</span>
<span class="na">LabelsMapping</span><span class="o">=</span><span class="s">\${N2D2_MODELS}/IMDBWIKI_target_face.dat</span>
</pre></div>
</div>
<p>In order to assign a gender to the extracted ROIs, the above target must
be modified to:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[post.Transformation-thres.Target-gender]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">TargetROIs</span>
<span class="na">ROIsLabelTarget</span><span class="o">=</span><span class="s">fc3.gender.Target</span>
<span class="na">MinOverlap</span><span class="o">=</span><span class="s">0.33</span>
<span class="na">FilterMinWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">FilterMinHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">FilterMinAspectRatio</span><span class="o">=</span><span class="s">0.5</span>
<span class="na">FilterMaxAspectRatio</span><span class="o">=</span><span class="s">1.5</span>
<span class="na">LabelsMapping</span><span class="o">=</span><span class="s">\${N2D2_MODELS}/IMDBWIKI_target_gender.dat</span>
</pre></div>
</div>
<p>Here, we use the <code class="docutils literal notranslate"><span class="pre">fc3.gender.Target</span></code> target to determine the most
probable gender of the ROI.</p>
</div>
<div class="section" id="data-visualization">
<h3>Data visualization<a class="headerlink" href="#data-visualization" title="Permalink to this headline">¶</a></h3>
<p>For each <em>Target</em> in the network, a corresponding folder is created in
the simulation directory, which contains learning, validation and test
confusion matrixes. The output estimation of the network for each
stimulus is also generated automatically for the test dataset and can be
visualized with the <em>./test.py</em> helper tool. An example is shown in
figure [fig:targetvisu].</p>
<div class="figure align-default" id="id6">
<img alt="Example of the target visualization helper tool." src="_images/target_visu.png" />
<p class="caption"><span class="caption-text">Example of the target visualization helper tool.</span><a class="headerlink" href="#id6" title="Permalink to this image">¶</a></p>
</div>
</div>
</div>
<div class="section" id="transcoding-a-learned-network-in-spike-coding">
<h2>Transcoding a learned network in spike-coding<a class="headerlink" href="#transcoding-a-learned-network-in-spike-coding" title="Permalink to this headline">¶</a></h2>
<p>N2D2 embeds an event-based simulator (historically known as ’Xnet’) and
allows to transcode a whole DNN in a spike-coding version and evaluate
the resulting spiking neural network performances. In this tutorial, we
will transcode the LeNet network described in section
[sec:BuildingClassifierNN].</p>
<div class="section" id="render-the-network-compatible-with-spike-simulations">
<h3>Render the network compatible with spike simulations<a class="headerlink" href="#render-the-network-compatible-with-spike-simulations" title="Permalink to this headline">¶</a></h3>
<p>The first step is to specify that we want to use a transcode model
(allowing both formal and spike simulation of the same network), by
changing the <code class="docutils literal notranslate"><span class="pre">DefaultModel</span></code> to:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">DefaultModel</span><span class="o">=</span><span class="s">Transcode_CUDA</span>
</pre></div>
</div>
<p>In order to perform spike simulations, the input of the network must be
of type <em>Environment</em>, which is a derived class of <em>StimuliProvider</em>
that adds spike coding support. In the INI model file, it is therefore
necessary to replace the <code class="docutils literal notranslate"><span class="pre">[sp]</span></code> section by an <code class="docutils literal notranslate"><span class="pre">[env]</span></code> section and
replace all references of <code class="docutils literal notranslate"><span class="pre">sp</span></code> to <code class="docutils literal notranslate"><span class="pre">env</span></code>.</p>
<p>Note that these changes have at this point no impact at all on the
formal coding simulations. The beginning of the INI file should be:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">DefaultModel</span><span class="o">=</span><span class="s">!\color{red}{Transcode\_CUDA}!</span>

<span class="c1">; Database</span>
<span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">MNIST_IDX_Database</span>
<span class="na">Validation</span><span class="o">=</span><span class="s">0.2 ; Use 20% of the dataset for validation</span>

<span class="c1">; Environment</span>
<span class="k">[!\color{red}{env}!]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">32</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">32</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">128</span>

<span class="k">[env.Transformation_1]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[!\color{red}{env}!]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[!\color{red}{env}!]SizeY</span>

<span class="k">[conv1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">!\color{red}{env}!</span>
<span class="na">...</span>
</pre></div>
</div>
<p>The dropout layer has no equivalence in spike-coding inference and must
be removed:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">!\color{red}{\st{[fc1.drop]}}!</span>
<span class="na">!\color{red}{\st{Input</span><span class="o">=</span><span class="s">fc1}}!</span>
<span class="na">!\color{red}{\st{Type</span><span class="o">=</span><span class="s">Dropout}}!</span>
<span class="na">!\color{red}{\st{NbOutputs</span><span class="o">=</span><span class="s">[fc1]NbOutputs}}!</span>

<span class="k">[fc2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1!\color{red}{\st{.drop}}!</span>
<span class="na">...</span>
</pre></div>
</div>
<p>The softmax layer has no equivalence in spike-coding inference and must
be removed as well. The <em>Target</em> must therefore be attached to
<code class="docutils literal notranslate"><span class="pre">[fc2]</span></code>:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">!\color{red}{\st{[softmax]}}!</span>
<span class="na">!\color{red}{\st{Input</span><span class="o">=</span><span class="s">fc2}}!</span>
<span class="na">!\color{red}{\st{Type</span><span class="o">=</span><span class="s">Softmax}}!</span>
<span class="na">!\color{red}{\st{NbOutputs</span><span class="o">=</span><span class="s">[fc2]NbOutputs}}!</span>
<span class="na">!\color{red}{\st{WithLoss</span><span class="o">=</span><span class="s">1}}!</span>

<span class="na">!\color{red}{\st{[softmax.Target]}}!</span>

<span class="k">[fc2.Target]</span>
<span class="na">...</span>
</pre></div>
</div>
<p>The network is now compatible with spike-coding simulations. However, we
did not specify at this point how to translate the input stimuli data
into spikes, nor the spiking neuron parameters (threshold value, leak
time constant…).</p>
</div>
<div class="section" id="configure-spike-coding-parameters">
<h3>Configure spike-coding parameters<a class="headerlink" href="#configure-spike-coding-parameters" title="Permalink to this headline">¶</a></h3>
<p>The first step is to configure how the input stimuli data must be coded
into spikes. To this end, we must attach a configuration section to the
<em>Environment</em>. Here, we specify a periodic coding with random initial
jitter with a minimum period of 10 ns and a maximum period of 100 us:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">env.config</span>

<span class="k">[env.config]</span>
<span class="c1">; Spike-based computing</span>
<span class="na">StimulusType</span><span class="o">=</span><span class="s">JitteredPeriodic</span>
<span class="na">PeriodMin</span><span class="o">=</span><span class="s">1,000,000 ; unit = fs</span>
<span class="na">PeriodMeanMin</span><span class="o">=</span><span class="s">10,000,000 ; unit = fs</span>
<span class="na">PeriodMeanMax</span><span class="o">=</span><span class="s">100,000,000,000 ; unit = fs</span>
<span class="na">PeriodRelStdDev</span><span class="o">=</span><span class="s">0.0</span>
</pre></div>
</div>
<p>The next step is to specify the neurons parameters, that will be common
to all layers and can therefore be specified in the <code class="docutils literal notranslate"><span class="pre">[common.config]</span></code>
section. In N2D2, the base spike-coding layers use a Leaky
Integrate-and-Fire (LIF) neuron model. By default, the leak time
constant is zero, resulting to simple Integrate-and-Fire (IF) neurons.</p>
<p>Here we simply specify that the neurons threshold must be the unity,
that the threshold is only positive and that there is no incoming
synaptic delay:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="c1">; Spike-based computing</span>
<span class="na">Threshold</span><span class="o">=</span><span class="s">1.0</span>
<span class="na">BipolarThreshold</span><span class="o">=</span><span class="s">0</span>
<span class="na">IncomingDelay</span><span class="o">=</span><span class="s">0</span>
</pre></div>
</div>
<p>Finally, we can limit the number of spikes required for the computation
of each stimulus by adding a decision delta threshold at the output
layer:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config,fc2.config</span>

<span class="k">[fc2.Target]</span>

<span class="k">[fc2.config]</span>
<span class="c1">; Spike-based computing</span>
<span class="na">TerminateDelta</span><span class="o">=</span><span class="s">4</span>
<span class="na">BipolarThreshold</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>The complete INI model corresponding to this tutorial can be found in
<em>models/LeNet_Spike.ini</em>.</p>
<p>Here is a summary of the steps required to reproduce the whole
experiment:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="s2">&quot;\$N2D2_MODELS/LeNet.ini&quot;</span> <span class="o">-</span><span class="n">learn</span> <span class="mi">6000000</span> <span class="o">-</span><span class="n">log</span> <span class="mi">100000</span>
<span class="o">./</span><span class="n">n2d2</span> <span class="s2">&quot;\$N2D2_MODELS/LeNet_Spike.ini&quot;</span> <span class="o">-</span><span class="n">test</span>
</pre></div>
</div>
<p>The final recognition rate reported at the end of the spike inference
should be almost identical to the formal coding network (around 99% for
the LeNet network).</p>
<p>Various statistics are available at the end of the spike-coding
simulation in the <em>stats_spike</em> folder and the <em>stats_spike.log</em> file.
Looking in the <em>stats_spike.log</em> file, one can read the following line
towards the end of the file:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Read</span> <span class="n">events</span> <span class="n">per</span> <span class="n">virtual</span> <span class="n">synapse</span> <span class="n">per</span> <span class="n">pattern</span> <span class="p">(</span><span class="n">average</span><span class="p">):</span> <span class="mf">0.654124</span>
</pre></div>
</div>
<p>This line reports the average number of accumulation operations per
synapse per input stimulus in the network. If this number if below 1.0,
it means that the spiking version of the network is more efficient than
its formal counterpart in terms of total number of operations!</p>
<p id="bibtex-bibliography-tuto-0"><dl class="citation">
<dt class="bibtex label" id="bhalgat2020lsq"><span class="brackets">BLN+20</span></dt>
<dd><p>Yash Bhalgat, Jinwon Lee, Markus Nagel, Tijmen Blankevoort, and Nojun Kwak. Lsq+: improving low-bit quantization through learnable offsets and better initialization. 2020. <a class="reference external" href="https://arxiv.org/abs/2004.09576">arXiv:2004.09576</a>.</p>
</dd>
<dt class="bibtex label" id="cordts2016cityscapes"><span class="brackets">COR+16</span></dt>
<dd><p>Marius Cordts, Mohamed Omran, Sebastian Ramos, Timo Rehfeld, Markus Enzweiler, Rodrigo Benenson, Uwe Franke, Stefan Roth, and Bernt Schiele. The cityscapes dataset for semantic urban scene understanding. In <em>Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</em>. 2016.</p>
</dd>
<dt class="bibtex label" id="dollar2009"><span class="brackets">DollarWSP09</span></dt>
<dd><p>P. Dollár, C. Wojek, B. Schiele, and P. Perona. Pedestrian detection: a benchmark. In <em>CVPR</em>. 2009.</p>
</dd>
<dt class="bibtex label" id="feifei2004"><span class="brackets">FFFP04</span></dt>
<dd><p>L. Fei-Fei, R. Fergus, and P. Perona. Learning generative visual models from few training examples: an incremental bayesian approach tested on 101 object categories. In <em>IEEE. CVPR 2004, Workshop on Generative-Model Based Vision</em>. 2004.</p>
</dd>
<dt class="bibtex label" id="glorot2010"><span class="brackets">GB10</span></dt>
<dd><p>X. Glorot and Y. Bengio. Understanding the difficulty of training deep feedforward neural networks. In <em>International conference on artificial intelligence and statistics</em>, 249–256. 2010.</p>
</dd>
<dt class="bibtex label" id="dblp-journals-corr-goyaldgnwktjh17"><span class="brackets"><a class="fn-backref" href="#id2">GDollarG+17</a></span></dt>
<dd><p>Priya Goyal, Piotr Dollár, Ross B. Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. Accurate, large minibatch SGD: training imagenet in 1 hour. <em>CoRR</em>, 2017. URL: <a class="reference external" href="http://arxiv.org/abs/1706.02677">http://arxiv.org/abs/1706.02677</a>, <a class="reference external" href="https://arxiv.org/abs/1706.02677">arXiv:1706.02677</a>.</p>
</dd>
<dt class="bibtex label" id="graham2014"><span class="brackets">Gra14</span></dt>
<dd><p>Benjamin Graham. Fractional max-pooling. <em>CoRR</em>, 2014.</p>
</dd>
<dt class="bibtex label" id="griffin2007"><span class="brackets">GHP07</span></dt>
<dd><p>Gregory Griffin, Alex Holub, and Pietro Perona. Caltech-256 object category dataset. Technical Report, 2007.</p>
</dd>
<dt class="bibtex label" id="he2015"><span class="brackets">HZRS15</span></dt>
<dd><p>Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In <em>Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV)</em>, ICCV ‘15, 1026–1034. 2015. <a class="reference external" href="https://doi.org/10.1109/ICCV.2015.123">doi:10.1109/ICCV.2015.123</a>.</p>
</dd>
<dt class="bibtex label" id="lstm1997"><span class="brackets">HS97</span></dt>
<dd><p>Sepp Hochreiter and Jürgen Schmidhuber. Long short-term memory. <em>Neural Computation</em>, 9(8):1735–1780, 1997. <a class="reference external" href="https://doi.org/10.1162/neco.1997.9.8.1735">doi:10.1162/neco.1997.9.8.1735</a>.</p>
</dd>
<dt class="bibtex label" id="houben2013"><span class="brackets">HSS+13</span></dt>
<dd><p>Sebastian Houben, Johannes Stallkamp, Jan Salmen, Marc Schlipsing, and Christian Igel. Detection of traffic signs in real-world images: the German Traffic Sign Detection Benchmark. In <em>International Joint Conference on Neural Networks</em>, number 1288. 2013.</p>
</dd>
<dt class="bibtex label" id="ioffe2015"><span class="brackets">IS15</span></dt>
<dd><p>Sergey Ioffe and Christian Szegedy. Batch normalization: accelerating deep network training by reducing internal covariate shift. <em>CoRR</em>, 2015.</p>
</dd>
<dt class="bibtex label" id="jain2010"><span class="brackets">JLM10</span></dt>
<dd><p>Vidit Jain and Erik Learned-Miller. FDDB: a benchmark for face detection in unconstrained settings. Technical Report UM-CS-2010-009, University of Massachusetts, Amherst, 2010.</p>
</dd>
<dt class="bibtex label" id="jin2019efficient"><span class="brackets">JYL19</span></dt>
<dd><p>Qing Jin, Linjie Yang, and Zhenyu Liao. Towards efficient training for neural network quantization. 2019. <a class="reference external" href="https://arxiv.org/abs/1912.10207">arXiv:1912.10207</a>.</p>
</dd>
<dt class="bibtex label" id="kingmab14"><span class="brackets">KB14</span></dt>
<dd><p>Diederik P. Kingma and Jimmy Ba. Adam: A method for stochastic optimization. <em>CoRR</em>, 2014. URL: <a class="reference external" href="http://arxiv.org/abs/1412.6980">http://arxiv.org/abs/1412.6980</a>, <a class="reference external" href="https://arxiv.org/abs/1412.6980">arXiv:1412.6980</a>.</p>
</dd>
<dt class="bibtex label" id="krizhevsky2009"><span class="brackets">Kri09</span></dt>
<dd><p>Alex Krizhevsky. Learning multiple layers of features from tiny images. Technical Report, 2009.</p>
</dd>
<dt class="bibtex label" id="lecun1998"><span class="brackets">LBBH98</span></dt>
<dd><p>Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. In <em>Proceedings of the IEEE</em>, volume 86, 2278–2324. 1998.</p>
</dd>
<dt class="bibtex label" id="lockhart2011"><span class="brackets">LWX+11</span></dt>
<dd><p>Jeffrey W. Lockhart, Gary M. Weiss, Jack C. Xue, Shaun T. Gallagher, Andrew B. Grosner, and Tony T. Pulickal. Design considerations for the wisdm smart phone-based sensor mining architecture. In <em>Proceedings of the Fifth International Workshop on Knowledge Discovery from Sensor Data</em>, SensorKDD ‘11, 25–33. New York, NY, USA, 2011. ACM. URL: <a class="reference external" href="http://doi.acm.org/10.1145/2003653.2003656">http://doi.acm.org/10.1145/2003653.2003656</a>, <a class="reference external" href="https://doi.org/10.1145/2003653.2003656">doi:10.1145/2003653.2003656</a>.</p>
</dd>
<dt class="bibtex label" id="rakotomamonjy2014"><span class="brackets">RG14</span></dt>
<dd><p>A. Rakotomamonjy and G. Gasso. Histogram of gradients of time-frequency representations for audio scene detection. Technical Report, 2014.</p>
</dd>
<dt class="bibtex label" id="ilsvrc15"><span class="brackets">RDS+15</span></dt>
<dd><p>Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, and Li Fei-Fei. ImageNet Large Scale Visual Recognition Challenge. <em>International Journal of Computer Vision (IJCV)</em>, 115(3):211–252, 2015. <a class="reference external" href="https://doi.org/10.1007/s11263-015-0816-y">doi:10.1007/s11263-015-0816-y</a>.</p>
</dd>
<dt class="bibtex label" id="srivastava2014"><span class="brackets">SHK+12</span></dt>
<dd><p>Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. Dropout: a simple way to prevent neural networks from voverfitting. <em>Journal of Machine Learning Research</em>, 15:1929–1958, 2012.</p>
</dd>
<dt class="bibtex label" id="stallkamp2012"><span class="brackets">SSSI12</span></dt>
<dd><p>J. Stallkamp, M. Schlipsing, J. Salmen, and C. Igel. Man vs. computer: benchmarking machine learning algorithms for traffic sign recognition. <em>Neural Networks</em>, 2012. <a class="reference external" href="https://doi.org/10.1016/j.neunet.2012.02.016">doi:10.1016/j.neunet.2012.02.016</a>.</p>
</dd>
<dt class="bibtex label" id="dota"><span class="brackets">XBD+17</span></dt>
<dd><p>Gui-Song Xia, Xiang Bai, Jian Ding, Zhen Zhu, Serge J. Belongie, Jiebo Luo, Mihai Datcu, Marcello Pelillo, and Liangpei Zhang. DOTA: A large-scale dataset for object detection in aerial images. <em>CoRR</em>, 2017. URL: <a class="reference external" href="http://arxiv.org/abs/1711.10398">http://arxiv.org/abs/1711.10398</a>, <a class="reference external" href="https://arxiv.org/abs/1711.10398">arXiv:1711.10398</a>.</p>
</dd>
<dt class="bibtex label" id="zhang2018residual"><span class="brackets"><a class="fn-backref" href="#id3">ZDM19</a></span></dt>
<dd><p>Hongyi Zhang, Yann N. Dauphin, and Tengyu Ma. Residual learning without normalization via better initialization. In <em>International Conference on Learning Representations</em>. 2019. URL: <a class="reference external" href="https://openreview.net/forum?id=H1gsz30cKX">https://openreview.net/forum?id=H1gsz30cKX</a>.</p>
</dd>
<dt class="bibtex label" id="lucey2010"><span class="brackets">LuceyCohnKanade+10</span></dt>
<dd><p>P. Lucey, J. F. Cohn, T. Kanade, J. Saragih, Z. Ambadar, and I. Matthews. The extended cohn-kanade dataset (ck+): a complete dataset for action unit and emotion-specified expression. In <em>2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition - Workshops</em>, volume, 94–101. June 2010. <a class="reference external" href="https://doi.org/10.1109/CVPRW.2010.5543262">doi:10.1109/CVPRW.2010.5543262</a>.</p>
</dd>
<dt class="bibtex label" id="speechcommandsv2"><span class="brackets">Warden18</span></dt>
<dd><p>P. Warden. Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition. <em>ArXiv e-prints</em>, April 2018. URL: <a class="reference external" href="https://arxiv.org/abs/1804.03209">https://arxiv.org/abs/1804.03209</a>, <a class="reference external" href="https://arxiv.org/abs/1804.03209">arXiv:1804.03209</a>.</p>
</dd>
<dt class="bibtex label" id="arxiv170508292w"><span class="brackets"><a class="fn-backref" href="#id1">WilsonRoelofsStern+17</a></span></dt>
<dd><p>Ashia C. Wilson, Rebecca Roelofs, Mitchell Stern, Nathan Srebro, and Benjamin Recht. The Marginal Value of Adaptive Gradient Methods in Machine Learning. <em>arXiv e-prints</em>, pages arXiv:1705.08292, May 2017. <a class="reference external" href="https://arxiv.org/abs/1705.08292">arXiv:1705.08292</a>.</p>
</dd>
</dl>
</p>
</div>
</div>
</div>


           </div>
           
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="onnx_convert.html" class="btn btn-neutral float-right" title="Obtain ONNX models" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="perfs_tools.html" class="btn btn-neutral float-left" title="Performance evaluation tools" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        
        &copy; Copyright 2019, CEA LIST

    </p>
  </div>
    
    
    Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
    
    <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
    
    provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
</body>
</html>