quant_qat.html



<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>[NEW] Quantization-Aware Training &mdash; N2D2  documentation</title>
  

  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />

  
  <!--[if lt IE 9]>
    <script src="_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script src="_static/jquery.js"></script>
        <script src="_static/underscore.js"></script>
        <script src="_static/doctools.js"></script>
        <script src="_static/language_data.js"></script>
        <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    
    <link rel="author" title="About these documents" href="about.html" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="Export: C++" href="export_CPP.html" />
    <link rel="prev" title="Post-training quantization" href="quant_post.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

            <a href="index.html" class="icon icon-home" alt="Documentation Home"> N2D2
          

          </a>

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
              <p class="caption"><span class="caption-text">Introduction:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="intro.html">Presentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="about.html">About N2D2-IP</a></li>
<li class="toctree-l1"><a class="reference internal" href="simus.html">Performing simulations</a></li>
<li class="toctree-l1"><a class="reference internal" href="perfs_tools.html">Performance evaluation tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="tuto.html">Tutorials</a></li>
</ul>
<p class="caption"><span class="caption-text">ONNX Import:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="onnx_convert.html">Obtain ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_import.html">Import ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_transfer.html">Train from ONNX models</a></li>
</ul>
<p class="caption"><span class="caption-text">Quantization and Export:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="quant_post.html">Post-training quantization</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">[NEW] Quantization-Aware Training</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#getting-started">Getting Started</a></li>
<li class="toctree-l2"><a class="reference internal" href="#cell-quantizer-definition">Cell Quantizer Definition</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#lsq">LSQ</a></li>
<li class="toctree-l3"><a class="reference internal" href="#sat">SAT</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#activation-quantizer-definition">Activation Quantizer Definition</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id3">LSQ</a></li>
<li class="toctree-l3"><a class="reference internal" href="#id5">SAT</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#layer-compatibility-table">Layer compatibility table</a></li>
<li class="toctree-l2"><a class="reference internal" href="#tutorial">Tutorial</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#onnx-model-resnet-18-example-ini-file">ONNX model : ResNet-18 Example - INI File</a></li>
<li class="toctree-l3"><a class="reference internal" href="#onnx-model-resnet-18-example-python">ONNX model : ResNet-18 Example - Python</a></li>
<li class="toctree-l3"><a class="reference internal" href="#hand-made-model-lenet-example-ini-file">Hand-Made model : LeNet Example - INI File</a></li>
<li class="toctree-l3"><a class="reference internal" href="#hand-made-model-lenet-example-python">Hand-Made model : LeNet Example - Python</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#results">Results</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#training-time-performances">Training Time Performances</a></li>
<li class="toctree-l3"><a class="reference internal" href="#mobilenet-v1">MobileNet-v1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#mobilenet-v2">MobileNet-v2</a></li>
<li class="toctree-l3"><a class="reference internal" href="#resnet">ResNet</a></li>
<li class="toctree-l3"><a class="reference internal" href="#inception-v1">Inception-v1</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP.html">Export: C++</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP_STM32.html">Export: C++/STM32</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_TensorRT.html">Export: TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_DNeuro.html">Export: DNeuro</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_ONNX.html">Export: ONNX</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_legacy.html">Export: other / legacy</a></li>
</ul>
<p class="caption"><span class="caption-text">INI File Interface:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="ini_intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_data_analysis.html">Stimuli data analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_environment.html">Stimuli provider (Environment)</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_layers.html">Network Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_target.html">Targets (outputs &amp; losses)</a></li>
<li class="toctree-l1"><a class="reference internal" href="adversarial.html">Adversarial module</a></li>
</ul>
<p class="caption"><span class="caption-text">Python API:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="containers.html">Containers</a></li>
<li class="toctree-l1"><a class="reference internal" href="cells.html">Cells</a></li>
<li class="toctree-l1"><a class="reference internal" href="databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="stimuliprovider.html">StimuliProvider</a></li>
<li class="toctree-l1"><a class="reference internal" href="deepnet.html">DeepNet</a></li>
</ul>
<p class="caption"><span class="caption-text">C++ API / Developer:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="dev_intro.html">Introduction</a></li>
</ul>

            
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">N2D2</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          
<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
        
      <li>[NEW] Quantization-Aware Training</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="_sources/quant_qat.rst.txt" rel="nofollow"> View page source</a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="new-quantization-aware-training">
<h1>[NEW] Quantization-Aware Training<a class="headerlink" href="#new-quantization-aware-training" title="Permalink to this headline">¶</a></h1>
<p><strong>N2D2-IP only: available upon request.</strong></p>
<div class="section" id="getting-started">
<h2>Getting Started<a class="headerlink" href="#getting-started" title="Permalink to this headline">¶</a></h2>
<p>N2D2 provides a complete design environement for a super wide range of quantization modes. Theses modes are implemented as a set of integrated highly modular blocks. N2D2 implements a per layer quantization scheme that can be different at
each level of the neural network. This high granularity enables to search for the best implementation depending on the
hardware constraints. Moreover to achieve the best performances, N2D2 implements the latest quantization methods currently at the best of the state-of-the-art, summarized in the figure below. Each dot represents one DNN (from the MobileNet or ResNet family), quantized with the number of bits indicated beside.</p>
<div class="figure align-default">
<img alt="QAT state-of-the-art." src="_images/qat_sota.png" />
</div>
<p>The user can leverage the high modularity of our super set of quantizer blocks and simply choose the  method that best fits with the initial requirements, computation resources and time to market strategy.
For example to implement the <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> method, one just need a limited number of training epochs to quantize a model
while implementing the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method requires a higher number of training epochs but gives today the best quantization performance.
In addition, the final objectives can be expressed in terms of different user requirements, depending on the compression capability of the targeted hardware.
Depending on these different objectives we can consider different quantization schemes:</p>
<dl class="simple">
<dt>Weights-Only Quantization</dt><dd><p>In this quantization scheme only weights are discretized to fit in a limited set of possible states. Activations
are not impacted.
Let’s say we want to evaluate the performances of our model with 3 bits weights for convolutions layers. N2D2 natively provides
the possibility to add a quantizer module, no need to import a new package or to modify any source code. We then
just need to specify <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> type and <code class="docutils literal notranslate"><span class="pre">QWeight.Range</span></code> for step level discretization.</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT ; Quantization Method can be ``LSQ`` or ``SAT``</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; Range is set to ``15`` step level, can be represented as a 4-bits word</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of fake-quantized weights on 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="Weights Quantization in fake quantization on 15 levels." src="_images/qat_weights_fakeQ.png" />
</div>
<dl>
<dt>Mixed Weights-Activations Quantization</dt><dd><p>In this quantization scheme both activations and weights are quantized at different possible step levels. For layers that have a non-linear activation function and learnable parameters, such as <code class="docutils literal notranslate"><span class="pre">Fc</span></code> and <code class="docutils literal notranslate"><span class="pre">Conv</span></code>, we first specify <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> in the same way as Weights-Only quantization mode.</p>
<p>Let’s say now that we want to evaluate the performances of our model with activations quantized to 3-bits.
In a similar manner, as for <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> quantizer we specify the activation quantizer <code class="docutils literal notranslate"><span class="pre">QAct</span></code> for all layers that have a non-linear activation function. Where the method itself, here <code class="docutils literal notranslate"><span class="pre">QAct=SAT</span></code> ensures the non-linearity of the activation function.</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QAct</span><span class="o">=</span><span class="s">SAT ; Quantization Method can be ``LSQ`` or ``SAT``</span>
<span class="na">QAct.Range</span><span class="o">=</span><span class="s">7 ; Range is set to ``7`` step level, can be represented as a 3-bits word</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of an activation feature map quantized in 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="4-bits Quantized Activation Feature Map ." src="_images/qat_fm_4b.png" />
</div>
<dl class="simple">
<dt>Integer-Only Quantization</dt><dd><p>Activations and weights are only represented as Integer during the learning phase, it’s one step beyond classical fake quantization !! In practice,
taking advantage of weight-only quantization scheme or fake quantization is clearly not obvious on hardware components. The Integer-Only
quantization mode is made to fill this void and enable to exploit QAT independently of the targeted hardware architecture. Most
common programmable architectures like CPU, GPU, DSP can implement it without additional burden.
In addition, hardware implementation like HLS or RTL description natively support low-precision integer operators.
In this mode, we replace the default quantization mode of the weights as follows :</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">QWeight.Mode</span><span class="o">=</span><span class="s">Integer ; Can be ``Default`` (fake-quantization) mode or ``Integer``(true integer) mode</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of full integer weights on 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="Weights Quantization in integer mode on 15 levels." src="_images/qat_weights_integer.png" />
</div>
</div>
<div class="section" id="cell-quantizer-definition">
<h2>Cell Quantizer Definition<a class="headerlink" href="#cell-quantizer-definition" title="Permalink to this headline">¶</a></h2>
<p>N2D2 implements a cell quantizer block for discretizing weights and biases at training time. This cell quantizer block
is totally transparent for the user. The quantization phase of the learnable parameters requires intensive operation
to adapt the distribution of the full-precision weights and to adapt the gradient. In addition the implementation
can become highly memory greedy which can be a problem to train a complex model on a single GPU without specific treatment (gradient accumulation, etc..).
That is why N2D2 merged different operations under dedicated CUDA kernels or CPU kernels allowing efficient utilization
of available computation resources.</p>
<p>Overview of the cell quantizer implementation :</p>
<div class="figure align-default">
<img alt="Cell Quantizer Functional Block." src="_images/qat_cell_flow.png" />
</div>
<p>The common set of parameters for any kind of Cell Quantizer.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 30%" />
<col style="width: 70%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight</span></code></p></td>
<td><p>Quantization method can be <code class="docutils literal notranslate"><span class="pre">SAT</span></code> or <code class="docutils literal notranslate"><span class="pre">LSQ</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Range</span></code> [<code class="docutils literal notranslate"><span class="pre">255</span></code>]</p></td>
<td><p>Range of Quantization, can be <code class="docutils literal notranslate"><span class="pre">1</span></code> for binary, <code class="docutils literal notranslate"><span class="pre">255</span></code> for 8-bits etc..</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Solver</span></code> [<code class="docutils literal notranslate"><span class="pre">SGD</span></code>]</p></td>
<td><p>Type of the Solver for learnable quantization parameters, can be <code class="docutils literal notranslate"><span class="pre">SGD</span></code> or <code class="docutils literal notranslate"><span class="pre">ADAM</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Mode</span></code> [<code class="docutils literal notranslate"><span class="pre">Default</span></code>]</p></td>
<td><p>Type of quantization Mode, can be <code class="docutils literal notranslate"><span class="pre">Default</span></code> or  <code class="docutils literal notranslate"><span class="pre">Integer</span></code></p></td>
</tr>
</tbody>
</table>
<div class="section" id="lsq">
<h3>LSQ<a class="headerlink" href="#lsq" title="Permalink to this headline">¶</a></h3>
<p>The Learned Step size Quantization method is tailored to learn the optimal quantization step size parameters in parallel with the network weights.
As described in  <a class="bibtex reference internal" href="tuto.html#bhalgat2020lsq" id="id1">[BLN+20]</a>, LSQ tries to estimate and scale the task loss gradient at each weight and activations layer’s quantizer step size,
such that it can be learned in conjunction with other network parameters. This method can be initialized using weights from a pre-trained
full precision model.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.StepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">100</span></code>]</p></td>
<td><p>Initial value of the learnable StepSize parameter</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.StepOptInitStepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>If <code class="docutils literal notranslate"><span class="pre">true</span></code> initialize StepSize along first batch variance</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="sat">
<h3>SAT<a class="headerlink" href="#sat" title="Permalink to this headline">¶</a></h3>
<p>Scale-Adjusted Training : <a class="bibtex reference internal" href="tuto.html#jin2019efficient" id="id2">[JYL19]</a> method is one of the most promising solutions. The authors proposed SAT as a simple yet effective technique with which the rules of
efficient training are maintained so that performance can be boosted and low-precision models can even surpass their
full-precision counterparts in some cases. This method exploits DoReFa scheme for the weights quantization.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 31%" />
<col style="width: 69%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.ApplyQuantization</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>Use <code class="docutils literal notranslate"><span class="pre">true</span></code> to enable quantization, if <code class="docutils literal notranslate"><span class="pre">false</span></code> parameters will be clamped between [-1.0,1.0]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.ApplyScaling</span></code> [<code class="docutils literal notranslate"><span class="pre">false</span></code>]</p></td>
<td><p>Use <code class="docutils literal notranslate"><span class="pre">true</span></code> to scale the parameters as described in the SAT paper</p></td>
</tr>
</tbody>
</table>
<p>Example of clamped weights when <code class="docutils literal notranslate"><span class="pre">QWeight.ApplyQuantization=false</span></code>:</p>
<div class="figure align-default">
<img alt="Weights Full-Precision clamped." src="_images/qat_weights_Clamp.png" />
</div>
</div>
</div>
<div class="section" id="activation-quantizer-definition">
<h2>Activation Quantizer Definition<a class="headerlink" href="#activation-quantizer-definition" title="Permalink to this headline">¶</a></h2>
<p>N2D2 implements an activation quantizer block to discretize activation at training time. Activation quantizer block
is totally transparent for the user. Quantization phase of the activation requires intensive operation
to learn parameters that will rescale the histogram of full-precision activation at training time. In addition the implementation can become highly memory greedy which can be a problem to train a complex model on a single GPU without specific treatment (gradient accumulation etc..).
That why N2D2 merged different operations under dedicated CUDA kernels or CPU kernels allowing efficient utilization
of available computing resources.</p>
<p>Overview of the activation quantizer implementation:</p>
<div class="figure align-default">
<img alt="Activation Quantizer Functionnal Block." src="_images/qat_act_flow.png" />
</div>
<p>The common set of parameters for any kind of Activation Quantizer.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 30%" />
<col style="width: 70%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct</span></code></p></td>
<td><p>Quantization method can be <code class="docutils literal notranslate"><span class="pre">SAT</span></code> or <code class="docutils literal notranslate"><span class="pre">LSQ</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Range</span></code> [<code class="docutils literal notranslate"><span class="pre">255</span></code>]</p></td>
<td><p>Range of Quantization, can be <code class="docutils literal notranslate"><span class="pre">1</span></code> for binary, <code class="docutils literal notranslate"><span class="pre">255</span></code> for 8-bits etc..</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Solver</span></code> [<code class="docutils literal notranslate"><span class="pre">SGD</span></code>]</p></td>
<td><p>Type of the Solver for learnable quantization parameters, can be <code class="docutils literal notranslate"><span class="pre">SGD</span></code> or <code class="docutils literal notranslate"><span class="pre">ADAM</span></code></p></td>
</tr>
</tbody>
</table>
<div class="section" id="id3">
<h3>LSQ<a class="headerlink" href="#id3" title="Permalink to this headline">¶</a></h3>
<p>The Learned Step size Quantization method is tailored to learn the optimum quantization stepsize parameters in parallel to the network’s weights.
As described in  <a class="bibtex reference internal" href="tuto.html#bhalgat2020lsq" id="id4">[BLN+20]</a>, LSQ tries to estimate and scale the task loss gradient at each weight and activations layer’s quantizer step size,
such that it can be learned in conjunction with other network parameters. This method can be initialized using weights from a pre-trained full precision model.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.StepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">100</span></code>]</p></td>
<td><p>Initial value of the learnable StepSize parameter</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.StepOptInitStepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>If <code class="docutils literal notranslate"><span class="pre">true</span></code> initialize StepSize following first batch variance</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="id5">
<h3>SAT<a class="headerlink" href="#id5" title="Permalink to this headline">¶</a></h3>
<p>Scale-Adjusted Training : <a class="bibtex reference internal" href="tuto.html#jin2019efficient" id="id6">[JYL19]</a> is one of the most promising solutions. The authors proposed SAT as a simple yet effective technique for which the rules of
efficient training are maintained so that performance can be boosted and low-precision models can even surpass their
full-precision counterparts in some cases.
This method exploits a CG-PACT scheme for the activations quantization which is a boosted version of PACT for low precision quantization.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Alpha</span></code> [<code class="docutils literal notranslate"><span class="pre">8.0</span></code>]</p></td>
<td><p>Initial value of the learnable alpha parameter</p></td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="section" id="layer-compatibility-table">
<h2>Layer compatibility table<a class="headerlink" href="#layer-compatibility-table" title="Permalink to this headline">¶</a></h2>
<p>Here we describe the compatibility table as a function of the quantization mode. The column <code class="docutils literal notranslate"><span class="pre">Cell</span></code> indicates layers that have a full support
to quantize their learnable parameters during the training phase. The column <code class="docutils literal notranslate"><span class="pre">Activation</span></code> indicates layers that can support an activation quantizer to their
output feature map. An additional column <code class="docutils literal notranslate"><span class="pre">Integer</span> <span class="pre">Core</span></code> indicates layers that can be represented without any full-precision
operators at inference time. Of course it is necessary that their input comes from quantized activations.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 23%" />
<col style="width: 29%" />
<col style="width: 26%" />
<col style="width: 23%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" rowspan="2"><p>Layer
compatibility
table</p></th>
<th class="head" colspan="3"><p>Quantization Mode</p></th>
</tr>
<tr class="row-even"><th class="head"><p>Cell (parameters)</p></th>
<th class="head"><p>Activation</p></th>
<th class="head"><p>Integer Core</p></th>
</tr>
</thead>
<tbody>
<tr class="row-odd"><td><p>Activation</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Anchor</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>BatchNorm*</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Conv</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Deconv</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>ElemWise</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Fc</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>FMP</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>LRN</p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>LSTM</p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>ObjectDet</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Padding</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Pool</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Proposal</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Reshape</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Resize</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>ROIPooling</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>RP</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Scaling</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Softmax</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Threshold</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Transformation</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Transpose</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Unpool</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
</tbody>
</table>
<p><em>BatchNorm Cell parameters are not directly quantized during the training phase. N2D2 provides a unique approach
to absorb its trained parameters as an integer within the only-integer representation of
the network during a fusion phase. This method is guaranteed without any loss of applicative
performances.</em></p>
</div>
<div class="section" id="tutorial">
<h2>Tutorial<a class="headerlink" href="#tutorial" title="Permalink to this headline">¶</a></h2>
<div class="section" id="onnx-model-resnet-18-example-ini-file">
<h3>ONNX model : ResNet-18 Example - INI File<a class="headerlink" href="#onnx-model-resnet-18-example-ini-file" title="Permalink to this headline">¶</a></h3>
<p>In this example we show how to quantize the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> ONNX model with 4-bits weights and 4-bits activations using the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization method.
We start from the <code class="docutils literal notranslate"><span class="pre">resnet18v1.onnx</span></code> file that you can pick-up at <a class="reference external" href="https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx">https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx</a> .
You can also download it from the  N2D2 script <code class="docutils literal notranslate"><span class="pre">N2D2/tools/install_onnx_models.py</span></code> that will automatically install a set of pre-trained
ONNX models under your <code class="docutils literal notranslate"><span class="pre">N2D2_MODELS</span></code> system path.</p>
<p>Moreover you can start from <code class="docutils literal notranslate"><span class="pre">.ini</span></code> located at <code class="docutils literal notranslate"><span class="pre">N2D2/models/ONNX/resnet-18-v1-onnx.ini</span></code> and directly modify it or you can create an empty
<code class="docutils literal notranslate"><span class="pre">resnet18-v1.ini</span></code> file in your simulation folder and to copy/paste all the following <code class="docutils literal notranslate"><span class="pre">ini</span></code> inistruction in it.</p>
<p>Also in this example you will need to know the ONNX cell names of your graph. We recommend you to opening the ONNX graph in a graph viewer
like NETRON (<a class="reference external" href="https://lutzroeder.github.io/netron/">https://lutzroeder.github.io/netron/</a>).</p>
<p>In this example we focus to demonstrate how to apply <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization procedure in the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> ONNX model. The first step of the procedure consists
to learn <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> on <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code> database with clamped weights.</p>
<p>First of all we instantiate driver dataset and pre-processing / data augmentation function:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">DefaultModel</span><span class="o">=</span><span class="s">Frame_CUDA</span>
<span class="c1">;ImageNet dataset</span>
<span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ILSVRC2012_Database</span>
<span class="na">RandomPartitioning</span><span class="o">=</span><span class="s">1</span>
<span class="na">Learn</span><span class="o">=</span><span class="s">1.0</span>

<span class="c1">;Standard image resolution for ImageNet, batchsize=128</span>
<span class="k">[sp]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">224</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">224</span>
<span class="na">NbChannels</span><span class="o">=</span><span class="s">3</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">128</span>

<span class="k">[sp.Transformation-1]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ColorSpaceTransformation</span>
<span class="na">ColorSpace</span><span class="o">=</span><span class="s">RGB</span>

<span class="k">[sp.Transformation-2]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RangeAffineTransformation</span>
<span class="na">FirstOperator</span><span class="o">=</span><span class="s">Divides</span>
<span class="na">FirstValue</span><span class="o">=</span><span class="s">255.0</span>

<span class="k">[sp.Transformation-3]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RandomResizeCropTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">224</span>
<span class="na">Height</span><span class="o">=</span><span class="s">224</span>
<span class="na">ScaleMin</span><span class="o">=</span><span class="s">0.2</span>
<span class="na">ScaleMax</span><span class="o">=</span><span class="s">1.0</span>
<span class="na">RatioMin</span><span class="o">=</span><span class="s">0.75</span>
<span class="na">RatioMax</span><span class="o">=</span><span class="s">1.33</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">LearnOnly</span>

<span class="k">[sp.Transformation-4]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">256</span>
<span class="na">Height</span><span class="o">=</span><span class="s">256</span>
<span class="na">KeepAspectRatio</span><span class="o">=</span><span class="s">1</span>
<span class="na">ResizeToFit</span><span class="o">=</span><span class="s">0</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>

<span class="k">[sp.Transformation-5]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">PadCropTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp.Transformation-4]Width</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp.Transformation-4]Height</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>

<span class="k">[sp.Transformation-6]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">SliceExtractionTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp]SizeY</span>
<span class="na">OffsetX</span><span class="o">=</span><span class="s">16</span>
<span class="na">OffsetY</span><span class="o">=</span><span class="s">16</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>

<span class="k">[sp.OnTheFlyTransformation-7]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">FlipTransformation</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">LearnOnly</span>
<span class="na">RandomHorizontalFlip</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>Now that dataset driver and pre-processing are well defined we can now focus on the neural network configuration.
In our example we decide to quantize all convolutions and fully-connected layers.
A base block common to all convolution layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Conv_def</span></code> that will
overwrite the native definition of all convolution layers defined in the ONNX file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Conv_def]</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">0  ; No scaling needed because each conv is followed by batch-normalization layers</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.VarianceNorm</span><span class="o">=</span><span class="s">FanOut ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.Scaling</span><span class="o">=</span><span class="s">1.0 ; Specific filler for SAT method</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">conv.config ; Config for conv parameters</span>

<span class="k">[conv.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">1 ; No bias needed because each conv is followed by batch-normalization layers</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>A base block common to all Fully-Connected layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Fc_def</span></code> that will
overwrite the native definition of all fully-connected layers defined in the ONNX file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Fc_def]</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">1  ; Scaling needed for Full-Connected</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.VarianceNorm</span><span class="o">=</span><span class="s">FanOut ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.Scaling</span><span class="o">=</span><span class="s">1.0 ; Specific filler for SAT method</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">fc.config ; Config for conv parameters</span>

<span class="k">[fc.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">0 ; Bias needed for fully-connected</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batch size of 256</span>
</pre></div>
</div>
<p>A base block common to all Batch-Normalization layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Batchnorm_def</span></code> that will
overwrites the native definition of all the batch-normalization defined in the ONNX file.
We simply defined here hyper-parameters of batch-normalization layers.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:BatchNorm_def]</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">bn_train.config</span>

<span class="k">[bn_train.config]</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batchsize is set to 128, iterationsize is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>Then we described the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> topology directly from the ONNX file that you previously installed in your simulation folder :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">sp</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ONNX</span>
<span class="na">File</span><span class="o">=</span><span class="s">resnet18v1.onnx</span>
<span class="na">ONNX_init</span><span class="o">=</span><span class="s">0 ; For SAT method we need to initialize from clamped weights or dedicated filler</span>

<span class="k">[soft1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">resnetv15_dense0_fwd</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Softmax</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">1000</span>
<span class="na">WithLoss</span><span class="o">=</span><span class="s">1</span>

<span class="k">[soft1.Target]</span>
</pre></div>
</div>
<p>Now that you set your <code class="docutils literal notranslate"><span class="pre">resnet18-v1.ini</span></code> file in your simulation folder you juste have to run the learning phase to clamp the weights
with the command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">resnet18</span><span class="o">-</span><span class="n">v1</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">150</span> <span class="o">-</span><span class="n">valid</span><span class="o">-</span><span class="n">metric</span> <span class="n">Precision</span>
</pre></div>
</div>
<p>This command will run the learning phase over 150 epochs with the <code class="docutils literal notranslate"><span class="pre">Imagenet</span></code> dataset.
The final test accuracy must reach at least 70%.</p>
<p>Next, you have to save parameters of the weights folder to the other location,
for example <em>weights_clamped</em> folder.</p>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model have clamped weights now ! You can check the results
in your <em>weights_clamped</em> folder.
Now that your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model provides clamped weights you can play with it and try different quantization mode.</p>
<p>In addition, if you want to quantized also the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> activations you need to create a specific base-block in your
<code class="docutils literal notranslate"><span class="pre">resnet-18-v1.ini</span></code> file in that way :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[ReluQ_def]</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear ; No more need Relu because SAT quantizer integrates it&#39;s own non-linear activation</span>
<span class="na">QAct</span><span class="o">=</span><span class="s">SAT ; SAT quantization method</span>
<span class="na">QAct.Range</span><span class="o">=</span><span class="s">15 ; Range=15 for 4-bits quantization model</span>
<span class="na">QActSolver</span><span class="o">=</span><span class="s">SGD ; Specify SGD solver for learned alpha parameter</span>
<span class="na">QActSolver.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">QActSolver.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">QActSolver.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>This base-block will be used to overwrites all the <code class="docutils literal notranslate"><span class="pre">rectifier</span></code> activation function of the ONNX model.
To identify the name of the different activation function you can use the netron tool:</p>
<div class="figure align-default">
<img alt="Relu Name." src="_images/qat_netron_r.png" />
</div>
<p>We then overrides all the activation function of the model by our previously described activation quantizer:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage1_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage2_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage2_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage2_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage2_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage3_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage3_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage3_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage3_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage4_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage4_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage4_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage4_activation1]ReluQ_def</span>
</pre></div>
</div>
<p>Now that activations quantization mode is set we focuses on the weights parameters quantization.
For example to quantize weights also in a 4 bits range, you should set the parameters convolution base-block
in that way:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Conv_def]</span>
<span class="na">...</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; Set to 1 for quantization mode</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ;  Conv is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">...</span>
</pre></div>
</div>
<p>In a same manner, you can modify the fully-connected base-block in that way :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Fc_def]</span>
<span class="na">...</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; Set to 1 for quantization mode</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ;  Fc is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">...</span>
</pre></div>
</div>
<p>As a common practice in quantization aware training the first and last layers are quantized in 8-bits.
In ResNet-18 the first layer is a convolution layer, we have to specify that to the first layer.</p>
<p>We first start to identify the name of the first layer under the netron environement:</p>
<div class="figure align-default">
<img alt="First Conv Cell Name." src="_images/qat_netron_conv_name.png" />
</div>
<p>We then overrides the range of the first convolution layer of the <code class="docutils literal notranslate"><span class="pre">resnet18v1.onnx</span></code> model:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_conv0_fwd]onnx:Conv_def</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ;resnetv15_conv0_fwd is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>In a same way we overrides the range of the last fully-connected layer in 8-bits :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_dense0_fwd]onnx:Fc_def</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ;resnetv15_dense0_fwd is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>Now that your modified <code class="docutils literal notranslate"><span class="pre">resnet-18-v1.ini</span></code> file is ready just have to run a learning phase with the same hyperparameters by
using transfer learning method from the previously clamped weights
with this command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">resnet</span><span class="o">-</span><span class="mi">18</span><span class="o">-</span><span class="n">v1</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">150</span> <span class="o">-</span><span class="n">w</span> <span class="n">weights_clamped</span> <span class="o">-</span><span class="n">valid</span><span class="o">-</span><span class="n">metric</span> <span class="n">Precision</span>
</pre></div>
</div>
<p>This command will run the learning phase over 150 epochs with the <code class="docutils literal notranslate"><span class="pre">Imagenet</span></code> dataset.
The final test accuracy must reach at least 70%.</p>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model have now it’s weights parameters and activations quantized in a 4-bits way !</p>
</div>
<div class="section" id="onnx-model-resnet-18-example-python">
<h3>ONNX model : ResNet-18 Example - Python<a class="headerlink" href="#onnx-model-resnet-18-example-python" title="Permalink to this headline">¶</a></h3>
<p>Coming soon.</p>
</div>
<div class="section" id="hand-made-model-lenet-example-ini-file">
<h3>Hand-Made model : LeNet Example - INI File<a class="headerlink" href="#hand-made-model-lenet-example-ini-file" title="Permalink to this headline">¶</a></h3>
<p>One can apply the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization methodology on the chosen deep neural network by adding the right parameters to the
<code class="docutils literal notranslate"><span class="pre">.ini</span></code> file. Here we show how to configure the <code class="docutils literal notranslate"><span class="pre">.ini</span></code> file to correctly apply the SAT quantization.
In this example we decide to apply the SAT quantization procedure in a hand-made LeNet model. The first step of the procedure consists
to learn <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> on <code class="docutils literal notranslate"><span class="pre">MNIST</span></code> database with clamped weights.</p>
<p>We recommend you to create an empty <code class="docutils literal notranslate"><span class="pre">LeNet.ini</span></code> file in your simulation folder and to copy/paste all following <code class="docutils literal notranslate"><span class="pre">ini</span></code> block
inside.</p>
<p>First of all we start to described <code class="docutils literal notranslate"><span class="pre">MNIST</span></code> driver dataset and pre-processing use for data augmentation at training and test phase:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="c1">; Frame_CUDA for GPU and Frame for CPU</span>
<span class="na">DefaultModel</span><span class="o">=</span><span class="s">Frame_CUDA</span>

<span class="c1">; MNIST Driver Database Instantiation</span>
<span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">MNIST_IDX_Database</span>
<span class="na">RandomPartitioning</span><span class="o">=</span><span class="s">1</span>

<span class="c1">; Environment Description , batch=256</span>
<span class="k">[env]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">32</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">32</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">256</span>

<span class="k">[env.Transformation_0]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">32</span>
<span class="na">Height</span><span class="o">=</span><span class="s">32</span>
</pre></div>
</div>
<p>In our example we decide to quantize all convolutions and fully-connected layers.
A base block common to all convolution layers can be defined in the <em>.ini</em> file. This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Conv_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">0 ; No scaling needed because each conv is followed by batch-normalization layers</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>

<span class="k">[common.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">1</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">None</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.0</span>
</pre></div>
</div>
<p>A base block common to all Full-Connected layers can be defined in the <em>.ini</em> file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Fc_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Fc</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">1 ; Scaling needed because for Full-Conncted</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
</pre></div>
</div>
<p>A base block common to all Batch-Normalization layers can be defined in the <em>.ini</em> file.
This base block is used to set quantization activations, like activations bits range, the quantization mode, and also solver configuration.
In this first step batch-normalization activation are not quantized yet. We simply defined a typical batch-normalization layer with <code class="docutils literal notranslate"><span class="pre">Rectifier</span></code> as
non-linear activation function.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Bn_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">BatchNorm</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Rectifier</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">bn.config</span>

<span class="k">[bn.config]</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">None</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.0</span>
</pre></div>
</div>
<p>Finally we described the full backbone of <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> topology:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[conv1] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">env</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>

<span class="na">[bn1] Bn_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv1</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv1]NbOutputs</span>

<span class="c1">; Non-overlapping max pooling P2</span>
<span class="k">[pool1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">bn1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1</span>

<span class="na">[conv2] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool1</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">16</span>
<span class="na">[bn2] Bn_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv2]NbOutputs</span>

<span class="k">[pool2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">bn2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">16</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1</span>

<span class="na">[conv3] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool2</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">120</span>

<span class="na">[bn3]Bn_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv3</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv3]NbOutputs</span>

<span class="k">[conv3.drop]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">bn3</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Dropout</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv3]NbOutputs</span>

<span class="na">[fc1] Fc_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv3.drop</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">84</span>

<span class="k">[fc1.drop]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Dropout</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[fc1]NbOutputs</span>

<span class="na">[fc2] Fc_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1.drop</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">10</span>

<span class="k">[softmax]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Softmax</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">10</span>
<span class="na">WithLoss</span><span class="o">=</span><span class="s">1</span>

<span class="k">[softmax.Target]</span>
</pre></div>
</div>
<p>Now that you have your ready <code class="docutils literal notranslate"><span class="pre">LeNet.ini</span></code> file in your simulation folder you juste have to run the learning phase to clamp the weights
with the command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">LeNet</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">100</span>
</pre></div>
</div>
<p>This command will run the learning phase over 100 epochs with the MNIST dataset.
The final test accuracy must reach at least 98.9%:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Final</span> <span class="n">recognition</span> <span class="n">rate</span><span class="p">:</span> <span class="mf">98.95</span><span class="o">%</span>    <span class="p">(</span><span class="n">error</span> <span class="n">rate</span><span class="p">:</span> <span class="mf">1.05</span><span class="o">%</span><span class="p">)</span>
<span class="n">Sensitivity</span><span class="p">:</span> <span class="mf">98.94</span><span class="o">%</span> <span class="o">/</span> <span class="n">Specificity</span><span class="p">:</span> <span class="mf">99.88</span><span class="o">%</span> <span class="o">/</span> <span class="n">Precision</span><span class="p">:</span> <span class="mf">98.94</span><span class="o">%</span>
<span class="n">Accuracy</span><span class="p">:</span> <span class="mf">99.79</span><span class="o">%</span> <span class="o">/</span> <span class="n">F1</span><span class="o">-</span><span class="n">score</span><span class="p">:</span> <span class="mf">98.94</span><span class="o">%</span> <span class="o">/</span> <span class="n">Informedness</span><span class="p">:</span> <span class="mf">98.82</span><span class="o">%</span>
</pre></div>
</div>
<p>Next, you have to save parameters of the weights folder to the other location,
for example <em>weights_clamped</em> folder.</p>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> model have clamped weights now ! You can check the results
in your <em>weights_clamped</em> folder, for example check your <em>conv3_weights_quant.distrib.png</em> file :</p>
<div class="figure align-default">
<img alt="Clamp weights." src="_images/qat_lenet_clamp.png" />
</div>
<p>Now that your <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> model provides clamped weights you can play with it and try different quantization mode.
Moreover, if you want to quantized also the <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> activations you have to modify the batch-normalization base-block from your
<code class="docutils literal notranslate"><span class="pre">LeNet.ini</span></code> file in that way :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Bn_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">BatchNorm</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear ; Replace by linear: SAT quantizer directly apply non-linear activation</span>
<span class="na">QAct</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QAct.Alpha</span><span class="o">=</span><span class="s">6.0</span>
<span class="na">QAct.Range</span><span class="o">=</span><span class="s">15 ; -&gt;15 for 4-bits range (2^4 - 1)</span>
<span class="na">QActSolver</span><span class="o">=</span><span class="s">SGD</span>
<span class="na">QActSolver.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">QActSolver.LearningRatePolicy</span><span class="o">=</span><span class="s">None</span>
<span class="na">QActSolver.Momentum</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">QActSolver.Decay</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">bn.config</span>
</pre></div>
</div>
<p>For example to quantize weights also in a 4 bits range, these parameters from the convolution base-block
must be modified in that way:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Conv_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">0</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; ApplyQuantization is now set to 1</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; Conv is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
</pre></div>
</div>
<p>In the same way, you have to modify the fully-connected base-block:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Fc_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Fc</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">1</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; ApplyQuantization is now set to 1</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; FC is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
</pre></div>
</div>
<p>As a common practice, the first and last layer are kept with 8-bits range weights parameters.
To do that, the first <em>conv1</em> layer of the <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> backbone must be modified in that way:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[conv1] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">env</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ; conv1 is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>And the last layer <em>fc2</em> of the <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> must be modified in that way:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[fc2] Fc_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">fc1.drop</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">10</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ; FC is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>Now that your modified <code class="docutils literal notranslate"><span class="pre">LeNet.ini</span></code> file is ready just have to run a learning phase with the same hyperparameters by
using transfer learning method from the previously clamped weights
with this command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">LeNet</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">100</span> <span class="o">-</span><span class="n">w</span> <span class="n">weights_clamped</span>
</pre></div>
</div>
<p>The final test accuracy should be close to 99%:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Final</span> <span class="n">recognition</span> <span class="n">rate</span><span class="p">:</span> <span class="mf">99.18</span><span class="o">%</span>    <span class="p">(</span><span class="n">error</span> <span class="n">rate</span><span class="p">:</span> <span class="mf">0.82</span><span class="o">%</span><span class="p">)</span>
  <span class="n">Sensitivity</span><span class="p">:</span> <span class="mf">99.173293</span><span class="o">%</span> <span class="o">/</span> <span class="n">Specificity</span><span class="p">:</span> <span class="mf">99.90895</span><span class="o">%</span> <span class="o">/</span> <span class="n">Precision</span><span class="p">:</span> <span class="mf">99.172422</span><span class="o">%</span>
  <span class="n">Accuracy</span><span class="p">:</span> <span class="mf">99.836</span><span class="o">%</span> <span class="o">/</span> <span class="n">F1</span><span class="o">-</span><span class="n">score</span><span class="p">:</span> <span class="mf">99.172195</span><span class="o">%</span> <span class="o">/</span> <span class="n">Informedness</span><span class="p">:</span> <span class="mf">99.082242</span><span class="o">%</span>
</pre></div>
</div>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> model is now fully-quantized ! You can check the results
in your <em>weights</em> folder, for example check your <em>conv3_weights_quant.distrib.png</em> file :</p>
<div class="figure align-default">
<img alt="Quantized LeNet weights." src="_images/qat_lenet_conv_q.png" />
</div>
<p>In addition you can have your model graph view that integrates the quantization information. This graph is automatically generated
at the learning phase or at the test phase. In this example this graph is generated under the name <code class="docutils literal notranslate"><span class="pre">LeNet.ini.png</span></code>.</p>
<p>As you can see in the following figure, the batch-normalization layers are present (and essential) in your quantized model:</p>
<div class="figure align-default">
<img alt="batchnorm." src="_images/qat_conv_bn.png" />
</div>
<p>Obviously, no one wants batch-normalization layers in it’s quantized model. We answer this problem with our internal tool
named <em>DeepNetQAT</em>. This tool allowed us to fused batch normalization parameters within the scaling, clipping and biases parameters
of our quantized models under the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method.</p>
<p>You can fuse the batch normalization parameters of your model with this command :</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">LeNet</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">test</span> <span class="o">-</span><span class="n">qat</span><span class="o">-</span><span class="n">sat</span> <span class="o">-</span><span class="n">w</span> <span class="n">weights</span>
</pre></div>
</div>
<p>Results must be exactly the same than with batch-normalization. Moreover quantizer modules have been entirely removed from your
model !
You can check the results in the newly generated <code class="docutils literal notranslate"><span class="pre">LeNet.ini.png</span></code> graph :</p>
<div class="figure align-default">
<img alt="no batchnorm." src="_images/qat_conv_nobn.png" />
</div>
<p>Moreover you can find your quantized weights and biases under the folder <code class="docutils literal notranslate"><span class="pre">weights_quantized</span></code>.</p>
</div>
<div class="section" id="hand-made-model-lenet-example-python">
<h3>Hand-Made model : LeNet Example - Python<a class="headerlink" href="#hand-made-model-lenet-example-python" title="Permalink to this headline">¶</a></h3>
<p>Coming soon.</p>
</div>
</div>
<div class="section" id="results">
<h2>Results<a class="headerlink" href="#results" title="Permalink to this headline">¶</a></h2>
<div class="section" id="training-time-performances">
<h3>Training Time Performances<a class="headerlink" href="#training-time-performances" title="Permalink to this headline">¶</a></h3>
<p>Quantization-aware training induces intensive operations at training phase. Forward and backward phases
require a lot of additional arithmetic operations compared to the standard floating-point training. The cost of operations
involved in quantization-aware training method directly impacts the training time of a model.</p>
<p>To mitigate this loss at training time, that can be a huge handicap to quantize your own model, N2D2 implements
CUDA kernels to efficiently perform these additional operations.</p>
<p>Here we estimate the training time per epoch for several well-known models on <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code> and <code class="docutils literal notranslate"><span class="pre">CIFAR-100</span></code> datasets.
These data are shared for information purpose, to give you a realistic idea of the necessary time required to quantize your model. It relies on a lot of parameters like
the dimension of your input data, the size of your dataset, pre-processing, your server/computer set-up installation, etc…</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 23%" />
<col style="width: 36%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="3"><p>ResNet-18   Per Epoch Training Time</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Quantization
Method -
Database</p></th>
<th class="head" colspan="2"><p>GPU Configuration</p></th>
</tr>
<tr class="row-odd"><th class="head"><p><code class="docutils literal notranslate"><span class="pre">A100</span></code> x1</p></th>
<th class="head"><p><code class="docutils literal notranslate"><span class="pre">2080</span> <span class="pre">RTX</span> <span class="pre">Ti</span></code> x1</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>15 min</p></td>
<td><p>40 min</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">CIFAR100</span></code></p></td>
<td><p>20 sec</p></td>
<td><p>1:15 min</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">LSQ</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>15 min</p></td>
<td><p>55 min</p></td>
</tr>
</tbody>
</table>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 23%" />
<col style="width: 36%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="3"><p>MobileNet-v1   Per Epoch Training Time</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Quantization
Method -
Database</p></th>
<th class="head" colspan="2"><p>GPU Configuration</p></th>
</tr>
<tr class="row-odd"><th class="head"><p><code class="docutils literal notranslate"><span class="pre">A100</span></code> x1</p></th>
<th class="head"><p><code class="docutils literal notranslate"><span class="pre">2080</span> <span class="pre">RTX</span> <span class="pre">Ti</span></code> x1</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>25 min</p></td>
<td><p>45 min</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">CIFAR100</span></code></p></td>
<td><p>30 sec</p></td>
<td><p>1:30 min</p></td>
</tr>
</tbody>
</table>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 23%" />
<col style="width: 36%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="3"><p>MobileNet-v2   Per Epoch Training Time</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Quantization
Method -
Database</p></th>
<th class="head" colspan="2"><p>GPU Configuration</p></th>
</tr>
<tr class="row-odd"><th class="head"><p><code class="docutils literal notranslate"><span class="pre">A100</span></code> x1</p></th>
<th class="head"><p><code class="docutils literal notranslate"><span class="pre">2080</span> <span class="pre">RTX</span> <span class="pre">Ti</span></code> x1</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>30 min</p></td>
<td><p>62 min</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">CIFAR100</span></code></p></td>
<td><p>1:15 min</p></td>
<td><p>2:10 min</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">LSQ</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>33 min</p></td>
<td><p>xx min</p></td>
</tr>
</tbody>
</table>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 23%" />
<col style="width: 36%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="3"><p>Inception-v1   Per Epoch Training Time</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Quantization
Method -
Database</p></th>
<th class="head" colspan="2"><p>GPU Configuration</p></th>
</tr>
<tr class="row-odd"><th class="head"><p><code class="docutils literal notranslate"><span class="pre">A100</span></code> x1</p></th>
<th class="head"><p><code class="docutils literal notranslate"><span class="pre">2080</span> <span class="pre">RTX</span> <span class="pre">Ti</span></code> x1</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>40 min</p></td>
<td><p>80 min</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">SAT</span></code> - <code class="docutils literal notranslate"><span class="pre">CIFAR100</span></code></p></td>
<td><p>35 sec</p></td>
<td><p>2:20 min</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">LSQ</span></code> - <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code></p></td>
<td><p>25 min</p></td>
<td><p>xx min</p></td>
</tr>
</tbody>
</table>
<p>These performances indicators have been realized with typical <code class="docutils literal notranslate"><span class="pre">Float32</span></code> datatype. Even if most of the operations used in the
quantizations methods provides support for <code class="docutils literal notranslate"><span class="pre">Float16</span></code> (half-precision) datatypes we recommend to not use it. In our experiments we
observes performances differences compared to the <code class="docutils literal notranslate"><span class="pre">Float32</span></code> datatype mode. These differences comes from gradient instability when
datatype is reduced to <code class="docutils literal notranslate"><span class="pre">Float16</span></code>.</p>
</div>
<div class="section" id="mobilenet-v1">
<h3>MobileNet-v1<a class="headerlink" href="#mobilenet-v1" title="Permalink to this headline">¶</a></h3>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method (~150 epochs) under the integer only mode :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 19%" />
<col style="width: 13%" />
<col style="width: 25%" />
<col style="width: 19%" />
<col style="width: 12%" />
<col style="width: 12%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="6"><p>MobileNet-v1 - <code class="docutils literal notranslate"><span class="pre">SAT</span></code> ImageNet Performances - Integer ONLY</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
<th class="head" rowspan="2"><p>Alpha</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">72.60</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>4 209 088</p></td>
<td><p>4.2 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">71.50</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>8</p></td>
<td><p>4 209 088</p></td>
<td><p>2.6 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">65.00</span> <span class="pre">%</span></code></p></td>
<td><p>2</p></td>
<td><p>8</p></td>
<td><p>4 209 088</p></td>
<td><p>1.8 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">60.15</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>4 209 088</p></td>
<td><p>1.4 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.90</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>4 209 088</p></td>
<td><p>2.6 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">64.60</span> <span class="pre">%</span></code></p></td>
<td><p>3</p></td>
<td><p>3</p></td>
<td><p>4 209 088</p></td>
<td><p>2.2 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">57.00</span> <span class="pre">%</span></code></p></td>
<td><p>2</p></td>
<td><p>2</p></td>
<td><p>4 209 088</p></td>
<td><p>1.8 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">69.00</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>3 156 816</p></td>
<td><p>2.6 MB</p></td>
<td><p>0.75</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">69.00</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>8</p></td>
<td><p>3 156 816</p></td>
<td><p>1.6 MB</p></td>
<td><p>0.75</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">65.60</span> <span class="pre">%</span></code></p></td>
<td><p>3</p></td>
<td><p>8</p></td>
<td><p>3 156 816</p></td>
<td><p>1.4 MB</p></td>
<td><p>0.75</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">58.70</span> <span class="pre">%</span></code></p></td>
<td><p>2</p></td>
<td><p>8</p></td>
<td><p>3 156 816</p></td>
<td><p>1.2 MB</p></td>
<td><p>0.75</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">53.80</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>3 156 816</p></td>
<td><p>0.9 MB</p></td>
<td><p>0.75</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">64.70</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>1 319 648</p></td>
<td><p>1.3 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">63.40</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>8</p></td>
<td><p>1 319 648</p></td>
<td><p>0.9 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">51.70</span> <span class="pre">%</span></code></p></td>
<td><p>2</p></td>
<td><p>8</p></td>
<td><p>1 319 648</p></td>
<td><p>0.7 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">44.00</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>1 319 648</p></td>
<td><p>0.6 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">63.70</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>1 319 648</p></td>
<td><p>0.9 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">54.80</span> <span class="pre">%</span></code></p></td>
<td><p>3</p></td>
<td><p>3</p></td>
<td><p>1 319 648</p></td>
<td><p>0.8 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">42.80</span> <span class="pre">%</span></code></p></td>
<td><p>2</p></td>
<td><p>2</p></td>
<td><p>1 319 648</p></td>
<td><p>0.7 MB</p></td>
<td><p>0.5</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">55.01</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>463 600</p></td>
<td><p>0.4 MB</p></td>
<td><p>0.25</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">50.02</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>8</p></td>
<td><p>463 600</p></td>
<td><p>0.3 MB</p></td>
<td><p>0.25</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">46.80</span> <span class="pre">%</span></code></p></td>
<td><p>3</p></td>
<td><p>8</p></td>
<td><p>463 600</p></td>
<td><p>0.3 MB</p></td>
<td><p>0.25</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">48.80</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>463 600</p></td>
<td><p>0.3 MB</p></td>
<td><p>0.25</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="mobilenet-v2">
<h3>MobileNet-v2<a class="headerlink" href="#mobilenet-v2" title="Permalink to this headline">¶</a></h3>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method (~150 epochs) under the integer only mode :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 19%" />
<col style="width: 13%" />
<col style="width: 25%" />
<col style="width: 19%" />
<col style="width: 12%" />
<col style="width: 12%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="6"><p>MobileNet-v2 - <code class="docutils literal notranslate"><span class="pre">SAT</span></code> ImageNet Performances - Integer ONLY</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
<th class="head" rowspan="2"><p>Alpha</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">72.5</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>3 214 048</p></td>
<td><p>3.2 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">58.59</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>3 214 048</p></td>
<td><p>1.3 MB</p></td>
<td><p>1.0</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.93</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>3 214 048</p></td>
<td><p>2.1 MB</p></td>
<td><p>1.0</p></td>
</tr>
</tbody>
</table>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> method on 1 epoch :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 19%" />
<col style="width: 13%" />
<col style="width: 25%" />
<col style="width: 19%" />
<col style="width: 12%" />
<col style="width: 12%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="6"><p>MobileNet-v2 - <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> ImageNet Performances - 1-Epoch</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
<th class="head" rowspan="2"><p>Alpha</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.1</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>3 214 048</p></td>
<td><p>3.2 MB</p></td>
<td><p>1.0</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="resnet">
<h3>ResNet<a class="headerlink" href="#resnet" title="Permalink to this headline">¶</a></h3>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method (~150 epochs) under the integer only mode :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 19%" />
<col style="width: 13%" />
<col style="width: 25%" />
<col style="width: 19%" />
<col style="width: 12%" />
<col style="width: 12%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="6"><p>ResNet - <code class="docutils literal notranslate"><span class="pre">SAT</span></code> ImageNet Performances - Integer ONLY</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
<th class="head" rowspan="2"><p>Depth</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.80</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>11 506 880</p></td>
<td><p>11.5 MB</p></td>
<td><p>18</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">67.6</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>11 506 880</p></td>
<td><p>1.9 MB</p></td>
<td><p>18</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.00</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>11 506 880</p></td>
<td><p>6.0 MB</p></td>
<td><p>18</p></td>
</tr>
</tbody>
</table>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> method on 1 epoch :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 19%" />
<col style="width: 13%" />
<col style="width: 25%" />
<col style="width: 19%" />
<col style="width: 12%" />
<col style="width: 12%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="6"><p>ResNet - <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> ImageNet Performances - 1-Epoch</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
<th class="head" rowspan="2"><p>Depth</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">70.20</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>11 506 880</p></td>
<td><p>11.5 MB</p></td>
<td><p>18</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="inception-v1">
<h3>Inception-v1<a class="headerlink" href="#inception-v1" title="Permalink to this headline">¶</a></h3>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method (~150 epochs) under the integer only mode :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 22%" />
<col style="width: 15%" />
<col style="width: 28%" />
<col style="width: 22%" />
<col style="width: 13%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="5"><p>Inception-v1 - <code class="docutils literal notranslate"><span class="pre">SAT</span></code> ImageNet Performances - Integer ONLY</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">73.60</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>6 600 006</p></td>
<td><p>6.6 MB</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">68.60</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>8</p></td>
<td><p>6 600 006</p></td>
<td><p>1.7 MB</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">72.30</span> <span class="pre">%</span></code></p></td>
<td><p>4</p></td>
<td><p>4</p></td>
<td><p>6 600 006</p></td>
<td><p>3.8 MB</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">68.50</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>4</p></td>
<td><p>6 600 006</p></td>
<td><p>1.7 MB</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">67.50</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>3</p></td>
<td><p>6 600 006</p></td>
<td><p>1.7 MB</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">63.30</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>2</p></td>
<td><p>6 600 006</p></td>
<td><p>1.7 MB</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">47.36</span> <span class="pre">%</span></code></p></td>
<td><p>1</p></td>
<td><p>1</p></td>
<td><p>6 600 006</p></td>
<td><p>1.7 MB</p></td>
</tr>
</tbody>
</table>
<p>Results obtained with the <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> method on 1 epoch :</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 22%" />
<col style="width: 15%" />
<col style="width: 28%" />
<col style="width: 22%" />
<col style="width: 13%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" colspan="5"><p>Inception-v1 - <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> ImageNet Performances - 1-Epoch</p></th>
</tr>
<tr class="row-even"><th class="head" rowspan="2"><p>Top-1
Precision</p></th>
<th class="head" colspan="2"><p>Quantization Range (bits)</p></th>
<th class="head" rowspan="2"><p>Parameters</p></th>
<th class="head" rowspan="2"><p>Memory</p></th>
</tr>
<tr class="row-odd"><th class="head"><p>Weights</p></th>
<th class="head"><p>Activations</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">72.60</span> <span class="pre">%</span></code></p></td>
<td><p>8</p></td>
<td><p>8</p></td>
<td><p>6 600 006</p></td>
<td><p>6.6 MB</p></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>


           </div>
           
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="export_CPP.html" class="btn btn-neutral float-right" title="Export: C++" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="quant_post.html" class="btn btn-neutral float-left" title="Post-training quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        
        &copy; Copyright 2019, CEA LIST

    </p>
  </div>
    
    
    Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
    
    <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
    
    provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
</body>
</html>