-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathquant_qat.html
1818 lines (1670 loc) · 110 KB
/
quant_qat.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>[NEW] Quantization-Aware Training — N2D2 documentation</title>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/language_data.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="author" title="About these documents" href="about.html" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Export: C++" href="export_CPP.html" />
<link rel="prev" title="Post-training quantization" href="quant_post.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home" alt="Documentation Home"> N2D2
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Introduction:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="intro.html">Presentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="about.html">About N2D2-IP</a></li>
<li class="toctree-l1"><a class="reference internal" href="simus.html">Performing simulations</a></li>
<li class="toctree-l1"><a class="reference internal" href="perfs_tools.html">Performance evaluation tools</a></li>
<li class="toctree-l1"><a class="reference internal" href="tuto.html">Tutorials</a></li>
</ul>
<p class="caption"><span class="caption-text">ONNX Import:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="onnx_convert.html">Obtain ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_import.html">Import ONNX models</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx_transfer.html">Train from ONNX models</a></li>
</ul>
<p class="caption"><span class="caption-text">Quantization and Export:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="quant_post.html">Post-training quantization</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">[NEW] Quantization-Aware Training</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#getting-started">Getting Started</a></li>
<li class="toctree-l2"><a class="reference internal" href="#cell-quantizer-definition">Cell Quantizer Definition</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#lsq">LSQ</a></li>
<li class="toctree-l3"><a class="reference internal" href="#sat">SAT</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#activation-quantizer-definition">Activation Quantizer Definition</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id3">LSQ</a></li>
<li class="toctree-l3"><a class="reference internal" href="#id5">SAT</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#layer-compatibility-table">Layer compatibility table</a></li>
<li class="toctree-l2"><a class="reference internal" href="#tutorial">Tutorial</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#onnx-model-resnet-18-example-ini-file">ONNX model : ResNet-18 Example - INI File</a></li>
<li class="toctree-l3"><a class="reference internal" href="#onnx-model-resnet-18-example-python">ONNX model : ResNet-18 Example - Python</a></li>
<li class="toctree-l3"><a class="reference internal" href="#hand-made-model-lenet-example-ini-file">Hand-Made model : LeNet Example - INI File</a></li>
<li class="toctree-l3"><a class="reference internal" href="#hand-made-model-lenet-example-python">Hand-Made model : LeNet Example - Python</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#results">Results</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#training-time-performances">Training Time Performances</a></li>
<li class="toctree-l3"><a class="reference internal" href="#mobilenet-v1">MobileNet-v1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#mobilenet-v2">MobileNet-v2</a></li>
<li class="toctree-l3"><a class="reference internal" href="#resnet">ResNet</a></li>
<li class="toctree-l3"><a class="reference internal" href="#inception-v1">Inception-v1</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP.html">Export: C++</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_CPP_STM32.html">Export: C++/STM32</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_TensorRT.html">Export: TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_DNeuro.html">Export: DNeuro</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_ONNX.html">Export: ONNX</a></li>
<li class="toctree-l1"><a class="reference internal" href="export_legacy.html">Export: other / legacy</a></li>
</ul>
<p class="caption"><span class="caption-text">INI File Interface:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="ini_intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_data_analysis.html">Stimuli data analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_environment.html">Stimuli provider (Environment)</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_layers.html">Network Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="ini_target.html">Targets (outputs & losses)</a></li>
<li class="toctree-l1"><a class="reference internal" href="adversarial.html">Adversarial module</a></li>
</ul>
<p class="caption"><span class="caption-text">Python API:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="containers.html">Containers</a></li>
<li class="toctree-l1"><a class="reference internal" href="cells.html">Cells</a></li>
<li class="toctree-l1"><a class="reference internal" href="databases.html">Databases</a></li>
<li class="toctree-l1"><a class="reference internal" href="stimuliprovider.html">StimuliProvider</a></li>
<li class="toctree-l1"><a class="reference internal" href="deepnet.html">DeepNet</a></li>
</ul>
<p class="caption"><span class="caption-text">C++ API / Developer:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="dev_intro.html">Introduction</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">N2D2</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home"></a> »</li>
<li>[NEW] Quantization-Aware Training</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/quant_qat.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="new-quantization-aware-training">
<h1>[NEW] Quantization-Aware Training<a class="headerlink" href="#new-quantization-aware-training" title="Permalink to this headline">¶</a></h1>
<p><strong>N2D2-IP only: available upon request.</strong></p>
<div class="section" id="getting-started">
<h2>Getting Started<a class="headerlink" href="#getting-started" title="Permalink to this headline">¶</a></h2>
<p>N2D2 provides a complete design environement for a super wide range of quantization modes. Theses modes are implemented as a set of integrated highly modular blocks. N2D2 implements a per layer quantization scheme that can be different at
each level of the neural network. This high granularity enables to search for the best implementation depending on the
hardware constraints. Moreover to achieve the best performances, N2D2 implements the latest quantization methods currently at the best of the state-of-the-art, summarized in the figure below. Each dot represents one DNN (from the MobileNet or ResNet family), quantized with the number of bits indicated beside.</p>
<div class="figure align-default">
<img alt="QAT state-of-the-art." src="_images/qat_sota.png" />
</div>
<p>The user can leverage the high modularity of our super set of quantizer blocks and simply choose the method that best fits with the initial requirements, computation resources and time to market strategy.
For example to implement the <code class="docutils literal notranslate"><span class="pre">LSQ</span></code> method, one just need a limited number of training epochs to quantize a model
while implementing the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> method requires a higher number of training epochs but gives today the best quantization performance.
In addition, the final objectives can be expressed in terms of different user requirements, depending on the compression capability of the targeted hardware.
Depending on these different objectives we can consider different quantization schemes:</p>
<dl class="simple">
<dt>Weights-Only Quantization</dt><dd><p>In this quantization scheme only weights are discretized to fit in a limited set of possible states. Activations
are not impacted.
Let’s say we want to evaluate the performances of our model with 3 bits weights for convolutions layers. N2D2 natively provides
the possibility to add a quantizer module, no need to import a new package or to modify any source code. We then
just need to specify <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> type and <code class="docutils literal notranslate"><span class="pre">QWeight.Range</span></code> for step level discretization.</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT ; Quantization Method can be ``LSQ`` or ``SAT``</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; Range is set to ``15`` step level, can be represented as a 4-bits word</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of fake-quantized weights on 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="Weights Quantization in fake quantization on 15 levels." src="_images/qat_weights_fakeQ.png" />
</div>
<dl>
<dt>Mixed Weights-Activations Quantization</dt><dd><p>In this quantization scheme both activations and weights are quantized at different possible step levels. For layers that have a non-linear activation function and learnable parameters, such as <code class="docutils literal notranslate"><span class="pre">Fc</span></code> and <code class="docutils literal notranslate"><span class="pre">Conv</span></code>, we first specify <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> in the same way as Weights-Only quantization mode.</p>
<p>Let’s say now that we want to evaluate the performances of our model with activations quantized to 3-bits.
In a similar manner, as for <code class="docutils literal notranslate"><span class="pre">QWeight</span></code> quantizer we specify the activation quantizer <code class="docutils literal notranslate"><span class="pre">QAct</span></code> for all layers that have a non-linear activation function. Where the method itself, here <code class="docutils literal notranslate"><span class="pre">QAct=SAT</span></code> ensures the non-linearity of the activation function.</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QAct</span><span class="o">=</span><span class="s">SAT ; Quantization Method can be ``LSQ`` or ``SAT``</span>
<span class="na">QAct.Range</span><span class="o">=</span><span class="s">7 ; Range is set to ``7`` step level, can be represented as a 3-bits word</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of an activation feature map quantized in 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="4-bits Quantized Activation Feature Map ." src="_images/qat_fm_4b.png" />
</div>
<dl class="simple">
<dt>Integer-Only Quantization</dt><dd><p>Activations and weights are only represented as Integer during the learning phase, it’s one step beyond classical fake quantization !! In practice,
taking advantage of weight-only quantization scheme or fake quantization is clearly not obvious on hardware components. The Integer-Only
quantization mode is made to fill this void and enable to exploit QAT independently of the targeted hardware architecture. Most
common programmable architectures like CPU, GPU, DSP can implement it without additional burden.
In addition, hardware implementation like HLS or RTL description natively support low-precision integer operators.
In this mode, we replace the default quantization mode of the weights as follows :</p>
</dd>
</dl>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">...</span>
<span class="na">QWeight.Mode</span><span class="o">=</span><span class="s">Integer ; Can be ``Default`` (fake-quantization) mode or ``Integer``(true integer) mode</span>
<span class="na">...</span>
</pre></div>
</div>
<p>Example of full integer weights on 4-bits / 15 levels:</p>
<div class="figure align-default">
<img alt="Weights Quantization in integer mode on 15 levels." src="_images/qat_weights_integer.png" />
</div>
</div>
<div class="section" id="cell-quantizer-definition">
<h2>Cell Quantizer Definition<a class="headerlink" href="#cell-quantizer-definition" title="Permalink to this headline">¶</a></h2>
<p>N2D2 implements a cell quantizer block for discretizing weights and biases at training time. This cell quantizer block
is totally transparent for the user. The quantization phase of the learnable parameters requires intensive operation
to adapt the distribution of the full-precision weights and to adapt the gradient. In addition the implementation
can become highly memory greedy which can be a problem to train a complex model on a single GPU without specific treatment (gradient accumulation, etc..).
That is why N2D2 merged different operations under dedicated CUDA kernels or CPU kernels allowing efficient utilization
of available computation resources.</p>
<p>Overview of the cell quantizer implementation :</p>
<div class="figure align-default">
<img alt="Cell Quantizer Functional Block." src="_images/qat_cell_flow.png" />
</div>
<p>The common set of parameters for any kind of Cell Quantizer.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 30%" />
<col style="width: 70%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight</span></code></p></td>
<td><p>Quantization method can be <code class="docutils literal notranslate"><span class="pre">SAT</span></code> or <code class="docutils literal notranslate"><span class="pre">LSQ</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Range</span></code> [<code class="docutils literal notranslate"><span class="pre">255</span></code>]</p></td>
<td><p>Range of Quantization, can be <code class="docutils literal notranslate"><span class="pre">1</span></code> for binary, <code class="docutils literal notranslate"><span class="pre">255</span></code> for 8-bits etc..</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Solver</span></code> [<code class="docutils literal notranslate"><span class="pre">SGD</span></code>]</p></td>
<td><p>Type of the Solver for learnable quantization parameters, can be <code class="docutils literal notranslate"><span class="pre">SGD</span></code> or <code class="docutils literal notranslate"><span class="pre">ADAM</span></code></p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.Mode</span></code> [<code class="docutils literal notranslate"><span class="pre">Default</span></code>]</p></td>
<td><p>Type of quantization Mode, can be <code class="docutils literal notranslate"><span class="pre">Default</span></code> or <code class="docutils literal notranslate"><span class="pre">Integer</span></code></p></td>
</tr>
</tbody>
</table>
<div class="section" id="lsq">
<h3>LSQ<a class="headerlink" href="#lsq" title="Permalink to this headline">¶</a></h3>
<p>The Learned Step size Quantization method is tailored to learn the optimal quantization step size parameters in parallel with the network weights.
As described in <a class="bibtex reference internal" href="tuto.html#bhalgat2020lsq" id="id1">[BLN+20]</a>, LSQ tries to estimate and scale the task loss gradient at each weight and activations layer’s quantizer step size,
such that it can be learned in conjunction with other network parameters. This method can be initialized using weights from a pre-trained
full precision model.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.StepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">100</span></code>]</p></td>
<td><p>Initial value of the learnable StepSize parameter</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.StepOptInitStepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>If <code class="docutils literal notranslate"><span class="pre">true</span></code> initialize StepSize along first batch variance</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="sat">
<h3>SAT<a class="headerlink" href="#sat" title="Permalink to this headline">¶</a></h3>
<p>Scale-Adjusted Training : <a class="bibtex reference internal" href="tuto.html#jin2019efficient" id="id2">[JYL19]</a> method is one of the most promising solutions. The authors proposed SAT as a simple yet effective technique with which the rules of
efficient training are maintained so that performance can be boosted and low-precision models can even surpass their
full-precision counterparts in some cases. This method exploits DoReFa scheme for the weights quantization.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 31%" />
<col style="width: 69%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.ApplyQuantization</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>Use <code class="docutils literal notranslate"><span class="pre">true</span></code> to enable quantization, if <code class="docutils literal notranslate"><span class="pre">false</span></code> parameters will be clamped between [-1.0,1.0]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QWeight.ApplyScaling</span></code> [<code class="docutils literal notranslate"><span class="pre">false</span></code>]</p></td>
<td><p>Use <code class="docutils literal notranslate"><span class="pre">true</span></code> to scale the parameters as described in the SAT paper</p></td>
</tr>
</tbody>
</table>
<p>Example of clamped weights when <code class="docutils literal notranslate"><span class="pre">QWeight.ApplyQuantization=false</span></code>:</p>
<div class="figure align-default">
<img alt="Weights Full-Precision clamped." src="_images/qat_weights_Clamp.png" />
</div>
</div>
</div>
<div class="section" id="activation-quantizer-definition">
<h2>Activation Quantizer Definition<a class="headerlink" href="#activation-quantizer-definition" title="Permalink to this headline">¶</a></h2>
<p>N2D2 implements an activation quantizer block to discretize activation at training time. Activation quantizer block
is totally transparent for the user. Quantization phase of the activation requires intensive operation
to learn parameters that will rescale the histogram of full-precision activation at training time. In addition the implementation can become highly memory greedy which can be a problem to train a complex model on a single GPU without specific treatment (gradient accumulation etc..).
That why N2D2 merged different operations under dedicated CUDA kernels or CPU kernels allowing efficient utilization
of available computing resources.</p>
<p>Overview of the activation quantizer implementation:</p>
<div class="figure align-default">
<img alt="Activation Quantizer Functionnal Block." src="_images/qat_act_flow.png" />
</div>
<p>The common set of parameters for any kind of Activation Quantizer.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 30%" />
<col style="width: 70%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct</span></code></p></td>
<td><p>Quantization method can be <code class="docutils literal notranslate"><span class="pre">SAT</span></code> or <code class="docutils literal notranslate"><span class="pre">LSQ</span></code>.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Range</span></code> [<code class="docutils literal notranslate"><span class="pre">255</span></code>]</p></td>
<td><p>Range of Quantization, can be <code class="docutils literal notranslate"><span class="pre">1</span></code> for binary, <code class="docutils literal notranslate"><span class="pre">255</span></code> for 8-bits etc..</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Solver</span></code> [<code class="docutils literal notranslate"><span class="pre">SGD</span></code>]</p></td>
<td><p>Type of the Solver for learnable quantization parameters, can be <code class="docutils literal notranslate"><span class="pre">SGD</span></code> or <code class="docutils literal notranslate"><span class="pre">ADAM</span></code></p></td>
</tr>
</tbody>
</table>
<div class="section" id="id3">
<h3>LSQ<a class="headerlink" href="#id3" title="Permalink to this headline">¶</a></h3>
<p>The Learned Step size Quantization method is tailored to learn the optimum quantization stepsize parameters in parallel to the network’s weights.
As described in <a class="bibtex reference internal" href="tuto.html#bhalgat2020lsq" id="id4">[BLN+20]</a>, LSQ tries to estimate and scale the task loss gradient at each weight and activations layer’s quantizer step size,
such that it can be learned in conjunction with other network parameters. This method can be initialized using weights from a pre-trained full precision model.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.StepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">100</span></code>]</p></td>
<td><p>Initial value of the learnable StepSize parameter</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.StepOptInitStepSize</span></code> [<code class="docutils literal notranslate"><span class="pre">true</span></code>]</p></td>
<td><p>If <code class="docutils literal notranslate"><span class="pre">true</span></code> initialize StepSize following first batch variance</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="id5">
<h3>SAT<a class="headerlink" href="#id5" title="Permalink to this headline">¶</a></h3>
<p>Scale-Adjusted Training : <a class="bibtex reference internal" href="tuto.html#jin2019efficient" id="id6">[JYL19]</a> is one of the most promising solutions. The authors proposed SAT as a simple yet effective technique for which the rules of
efficient training are maintained so that performance can be boosted and low-precision models can even surpass their
full-precision counterparts in some cases.
This method exploits a CG-PACT scheme for the activations quantization which is a boosted version of PACT for low precision quantization.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 41%" />
<col style="width: 59%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Option [default value]</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">QAct.Alpha</span></code> [<code class="docutils literal notranslate"><span class="pre">8.0</span></code>]</p></td>
<td><p>Initial value of the learnable alpha parameter</p></td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="section" id="layer-compatibility-table">
<h2>Layer compatibility table<a class="headerlink" href="#layer-compatibility-table" title="Permalink to this headline">¶</a></h2>
<p>Here we describe the compatibility table as a function of the quantization mode. The column <code class="docutils literal notranslate"><span class="pre">Cell</span></code> indicates layers that have a full support
to quantize their learnable parameters during the training phase. The column <code class="docutils literal notranslate"><span class="pre">Activation</span></code> indicates layers that can support an activation quantizer to their
output feature map. An additional column <code class="docutils literal notranslate"><span class="pre">Integer</span> <span class="pre">Core</span></code> indicates layers that can be represented without any full-precision
operators at inference time. Of course it is necessary that their input comes from quantized activations.</p>
<table class="docutils align-default">
<colgroup>
<col style="width: 23%" />
<col style="width: 29%" />
<col style="width: 26%" />
<col style="width: 23%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head" rowspan="2"><p>Layer
compatibility
table</p></th>
<th class="head" colspan="3"><p>Quantization Mode</p></th>
</tr>
<tr class="row-even"><th class="head"><p>Cell (parameters)</p></th>
<th class="head"><p>Activation</p></th>
<th class="head"><p>Integer Core</p></th>
</tr>
</thead>
<tbody>
<tr class="row-odd"><td><p>Activation</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Anchor</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>BatchNorm*</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Conv</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Deconv</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>ElemWise</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Fc</p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>FMP</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>LRN</p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>LSTM</p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>ObjectDet</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Padding</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Pool</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Proposal</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Reshape</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Resize</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>ROIPooling</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>RP</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Scaling</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Softmax</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Threshold</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Transformation</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-odd"><td><p>Transpose</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
</tr>
<tr class="row-even"><td><p>Unpool</p></td>
<td></td>
<td><p><span class="raw-html"><font color="green"></span> ✓ <span class="raw-html"></font></span></p></td>
<td><p><span class="raw-html"><font color="red"></span> ✗ <span class="raw-html"></font></span></p></td>
</tr>
</tbody>
</table>
<p><em>BatchNorm Cell parameters are not directly quantized during the training phase. N2D2 provides a unique approach
to absorb its trained parameters as an integer within the only-integer representation of
the network during a fusion phase. This method is guaranteed without any loss of applicative
performances.</em></p>
</div>
<div class="section" id="tutorial">
<h2>Tutorial<a class="headerlink" href="#tutorial" title="Permalink to this headline">¶</a></h2>
<div class="section" id="onnx-model-resnet-18-example-ini-file">
<h3>ONNX model : ResNet-18 Example - INI File<a class="headerlink" href="#onnx-model-resnet-18-example-ini-file" title="Permalink to this headline">¶</a></h3>
<p>In this example we show how to quantize the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> ONNX model with 4-bits weights and 4-bits activations using the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization method.
We start from the <code class="docutils literal notranslate"><span class="pre">resnet18v1.onnx</span></code> file that you can pick-up at <a class="reference external" href="https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx">https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx</a> .
You can also download it from the N2D2 script <code class="docutils literal notranslate"><span class="pre">N2D2/tools/install_onnx_models.py</span></code> that will automatically install a set of pre-trained
ONNX models under your <code class="docutils literal notranslate"><span class="pre">N2D2_MODELS</span></code> system path.</p>
<p>Moreover you can start from <code class="docutils literal notranslate"><span class="pre">.ini</span></code> located at <code class="docutils literal notranslate"><span class="pre">N2D2/models/ONNX/resnet-18-v1-onnx.ini</span></code> and directly modify it or you can create an empty
<code class="docutils literal notranslate"><span class="pre">resnet18-v1.ini</span></code> file in your simulation folder and to copy/paste all the following <code class="docutils literal notranslate"><span class="pre">ini</span></code> inistruction in it.</p>
<p>Also in this example you will need to know the ONNX cell names of your graph. We recommend you to opening the ONNX graph in a graph viewer
like NETRON (<a class="reference external" href="https://lutzroeder.github.io/netron/">https://lutzroeder.github.io/netron/</a>).</p>
<p>In this example we focus to demonstrate how to apply <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization procedure in the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> ONNX model. The first step of the procedure consists
to learn <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> on <code class="docutils literal notranslate"><span class="pre">ImageNet</span></code> database with clamped weights.</p>
<p>First of all we instantiate driver dataset and pre-processing / data augmentation function:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">DefaultModel</span><span class="o">=</span><span class="s">Frame_CUDA</span>
<span class="c1">;ImageNet dataset</span>
<span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ILSVRC2012_Database</span>
<span class="na">RandomPartitioning</span><span class="o">=</span><span class="s">1</span>
<span class="na">Learn</span><span class="o">=</span><span class="s">1.0</span>
<span class="c1">;Standard image resolution for ImageNet, batchsize=128</span>
<span class="k">[sp]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">224</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">224</span>
<span class="na">NbChannels</span><span class="o">=</span><span class="s">3</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">128</span>
<span class="k">[sp.Transformation-1]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ColorSpaceTransformation</span>
<span class="na">ColorSpace</span><span class="o">=</span><span class="s">RGB</span>
<span class="k">[sp.Transformation-2]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RangeAffineTransformation</span>
<span class="na">FirstOperator</span><span class="o">=</span><span class="s">Divides</span>
<span class="na">FirstValue</span><span class="o">=</span><span class="s">255.0</span>
<span class="k">[sp.Transformation-3]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RandomResizeCropTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">224</span>
<span class="na">Height</span><span class="o">=</span><span class="s">224</span>
<span class="na">ScaleMin</span><span class="o">=</span><span class="s">0.2</span>
<span class="na">ScaleMax</span><span class="o">=</span><span class="s">1.0</span>
<span class="na">RatioMin</span><span class="o">=</span><span class="s">0.75</span>
<span class="na">RatioMax</span><span class="o">=</span><span class="s">1.33</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">LearnOnly</span>
<span class="k">[sp.Transformation-4]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">256</span>
<span class="na">Height</span><span class="o">=</span><span class="s">256</span>
<span class="na">KeepAspectRatio</span><span class="o">=</span><span class="s">1</span>
<span class="na">ResizeToFit</span><span class="o">=</span><span class="s">0</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>
<span class="k">[sp.Transformation-5]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">PadCropTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp.Transformation-4]Width</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp.Transformation-4]Height</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>
<span class="k">[sp.Transformation-6]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">SliceExtractionTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">[sp]SizeX</span>
<span class="na">Height</span><span class="o">=</span><span class="s">[sp]SizeY</span>
<span class="na">OffsetX</span><span class="o">=</span><span class="s">16</span>
<span class="na">OffsetY</span><span class="o">=</span><span class="s">16</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">NoLearn</span>
<span class="k">[sp.OnTheFlyTransformation-7]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">FlipTransformation</span>
<span class="na">ApplyTo</span><span class="o">=</span><span class="s">LearnOnly</span>
<span class="na">RandomHorizontalFlip</span><span class="o">=</span><span class="s">1</span>
</pre></div>
</div>
<p>Now that dataset driver and pre-processing are well defined we can now focus on the neural network configuration.
In our example we decide to quantize all convolutions and fully-connected layers.
A base block common to all convolution layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Conv_def</span></code> that will
overwrite the native definition of all convolution layers defined in the ONNX file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Conv_def]</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">0 ; No scaling needed because each conv is followed by batch-normalization layers</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.VarianceNorm</span><span class="o">=</span><span class="s">FanOut ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.Scaling</span><span class="o">=</span><span class="s">1.0 ; Specific filler for SAT method</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">conv.config ; Config for conv parameters</span>
<span class="k">[conv.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">1 ; No bias needed because each conv is followed by batch-normalization layers</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>A base block common to all Fully-Connected layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Fc_def</span></code> that will
overwrite the native definition of all fully-connected layers defined in the ONNX file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Fc_def]</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">1 ; Scaling needed for Full-Connected</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">WeightsFiller</span><span class="o">=</span><span class="s">XavierFiller ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.VarianceNorm</span><span class="o">=</span><span class="s">FanOut ; Specific filler for SAT method</span>
<span class="na">WeightsFiller.Scaling</span><span class="o">=</span><span class="s">1.0 ; Specific filler for SAT method</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">fc.config ; Config for conv parameters</span>
<span class="k">[fc.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">0 ; Bias needed for fully-connected</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batch size of 256</span>
</pre></div>
</div>
<p>A base block common to all Batch-Normalization layers can be defined in the <em>.ini</em> file. This specific base-block uses <code class="docutils literal notranslate"><span class="pre">onnx:Batchnorm_def</span></code> that will
overwrites the native definition of all the batch-normalization defined in the ONNX file.
We simply defined here hyper-parameters of batch-normalization layers.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:BatchNorm_def]</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">bn_train.config</span>
<span class="k">[bn_train.config]</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">Solvers.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">Solvers.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batchsize is set to 128, iterationsize is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>Then we described the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> topology directly from the ONNX file that you previously installed in your simulation folder :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">sp</span>
<span class="na">Type</span><span class="o">=</span><span class="s">ONNX</span>
<span class="na">File</span><span class="o">=</span><span class="s">resnet18v1.onnx</span>
<span class="na">ONNX_init</span><span class="o">=</span><span class="s">0 ; For SAT method we need to initialize from clamped weights or dedicated filler</span>
<span class="k">[soft1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">resnetv15_dense0_fwd</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Softmax</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">1000</span>
<span class="na">WithLoss</span><span class="o">=</span><span class="s">1</span>
<span class="k">[soft1.Target]</span>
</pre></div>
</div>
<p>Now that you set your <code class="docutils literal notranslate"><span class="pre">resnet18-v1.ini</span></code> file in your simulation folder you juste have to run the learning phase to clamp the weights
with the command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">resnet18</span><span class="o">-</span><span class="n">v1</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">150</span> <span class="o">-</span><span class="n">valid</span><span class="o">-</span><span class="n">metric</span> <span class="n">Precision</span>
</pre></div>
</div>
<p>This command will run the learning phase over 150 epochs with the <code class="docutils literal notranslate"><span class="pre">Imagenet</span></code> dataset.
The final test accuracy must reach at least 70%.</p>
<p>Next, you have to save parameters of the weights folder to the other location,
for example <em>weights_clamped</em> folder.</p>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model have clamped weights now ! You can check the results
in your <em>weights_clamped</em> folder.
Now that your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model provides clamped weights you can play with it and try different quantization mode.</p>
<p>In addition, if you want to quantized also the <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> activations you need to create a specific base-block in your
<code class="docutils literal notranslate"><span class="pre">resnet-18-v1.ini</span></code> file in that way :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[ReluQ_def]</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear ; No more need Relu because SAT quantizer integrates it's own non-linear activation</span>
<span class="na">QAct</span><span class="o">=</span><span class="s">SAT ; SAT quantization method</span>
<span class="na">QAct.Range</span><span class="o">=</span><span class="s">15 ; Range=15 for 4-bits quantization model</span>
<span class="na">QActSolver</span><span class="o">=</span><span class="s">SGD ; Specify SGD solver for learned alpha parameter</span>
<span class="na">QActSolver.LearningRatePolicy</span><span class="o">=</span><span class="s">CosineDecay ; Can be different Policy following your problem, recommended with SAT method</span>
<span class="na">QActSolver.LearningRate</span><span class="o">=</span><span class="s">0.05 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.Momentum</span><span class="o">=</span><span class="s">0.9 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.Decay</span><span class="o">=</span><span class="s">0.00004 ; Typical value for batchsize=256 with SAT method</span>
<span class="na">QActSolver.MaxIterations</span><span class="o">=</span><span class="s">192175050; For 150-epoch on ImageNet 1 epoch = 1281167 samples, 150 epoch = 1281167*150 samples</span>
<span class="na">QActSolver.IterationSize</span><span class="o">=</span><span class="s">2 ;Our physical batch size is set to 128, iteration size is set to 2 because we want a batchsize of 256</span>
</pre></div>
</div>
<p>This base-block will be used to overwrites all the <code class="docutils literal notranslate"><span class="pre">rectifier</span></code> activation function of the ONNX model.
To identify the name of the different activation function you can use the netron tool:</p>
<div class="figure align-default">
<img alt="Relu Name." src="_images/qat_netron_r.png" />
</div>
<p>We then overrides all the activation function of the model by our previously described activation quantizer:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage1_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage1_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage2_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage2_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage2_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage2_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage3_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage3_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage3_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage3_activation1]ReluQ_def</span>
<span class="na">[resnetv15_stage4_relu0_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage4_activation0]ReluQ_def</span>
<span class="na">[resnetv15_stage4_relu1_fwd]ReluQ_def</span>
<span class="na">[resnetv15_stage4_activation1]ReluQ_def</span>
</pre></div>
</div>
<p>Now that activations quantization mode is set we focuses on the weights parameters quantization.
For example to quantize weights also in a 4 bits range, you should set the parameters convolution base-block
in that way:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Conv_def]</span>
<span class="na">...</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; Set to 1 for quantization mode</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; Conv is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">...</span>
</pre></div>
</div>
<p>In a same manner, you can modify the fully-connected base-block in that way :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[onnx:Fc_def]</span>
<span class="na">...</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">1 ; Set to 1 for quantization mode</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">15 ; Fc is now quantized in 4-bits range (2^4 - 1)</span>
<span class="na">...</span>
</pre></div>
</div>
<p>As a common practice in quantization aware training the first and last layers are quantized in 8-bits.
In ResNet-18 the first layer is a convolution layer, we have to specify that to the first layer.</p>
<p>We first start to identify the name of the first layer under the netron environement:</p>
<div class="figure align-default">
<img alt="First Conv Cell Name." src="_images/qat_netron_conv_name.png" />
</div>
<p>We then overrides the range of the first convolution layer of the <code class="docutils literal notranslate"><span class="pre">resnet18v1.onnx</span></code> model:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_conv0_fwd]onnx:Conv_def</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ;resnetv15_conv0_fwd is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>In a same way we overrides the range of the last fully-connected layer in 8-bits :</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[resnetv15_dense0_fwd]onnx:Fc_def</span>
<span class="na">QWeight.Range</span><span class="o">=</span><span class="s">255 ;resnetv15_dense0_fwd is now quantized in 8-bits range (2^8 - 1)</span>
</pre></div>
</div>
<p>Now that your modified <code class="docutils literal notranslate"><span class="pre">resnet-18-v1.ini</span></code> file is ready just have to run a learning phase with the same hyperparameters by
using transfer learning method from the previously clamped weights
with this command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">n2d2</span> <span class="n">resnet</span><span class="o">-</span><span class="mi">18</span><span class="o">-</span><span class="n">v1</span><span class="o">.</span><span class="n">ini</span> <span class="o">-</span><span class="n">learn</span><span class="o">-</span><span class="n">epoch</span> <span class="mi">150</span> <span class="o">-</span><span class="n">w</span> <span class="n">weights_clamped</span> <span class="o">-</span><span class="n">valid</span><span class="o">-</span><span class="n">metric</span> <span class="n">Precision</span>
</pre></div>
</div>
<p>This command will run the learning phase over 150 epochs with the <code class="docutils literal notranslate"><span class="pre">Imagenet</span></code> dataset.
The final test accuracy must reach at least 70%.</p>
<p>Congratulations! Your <code class="docutils literal notranslate"><span class="pre">resnet-18-v1</span></code> model have now it’s weights parameters and activations quantized in a 4-bits way !</p>
</div>
<div class="section" id="onnx-model-resnet-18-example-python">
<h3>ONNX model : ResNet-18 Example - Python<a class="headerlink" href="#onnx-model-resnet-18-example-python" title="Permalink to this headline">¶</a></h3>
<p>Coming soon.</p>
</div>
<div class="section" id="hand-made-model-lenet-example-ini-file">
<h3>Hand-Made model : LeNet Example - INI File<a class="headerlink" href="#hand-made-model-lenet-example-ini-file" title="Permalink to this headline">¶</a></h3>
<p>One can apply the <code class="docutils literal notranslate"><span class="pre">SAT</span></code> quantization methodology on the chosen deep neural network by adding the right parameters to the
<code class="docutils literal notranslate"><span class="pre">.ini</span></code> file. Here we show how to configure the <code class="docutils literal notranslate"><span class="pre">.ini</span></code> file to correctly apply the SAT quantization.
In this example we decide to apply the SAT quantization procedure in a hand-made LeNet model. The first step of the procedure consists
to learn <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> on <code class="docutils literal notranslate"><span class="pre">MNIST</span></code> database with clamped weights.</p>
<p>We recommend you to create an empty <code class="docutils literal notranslate"><span class="pre">LeNet.ini</span></code> file in your simulation folder and to copy/paste all following <code class="docutils literal notranslate"><span class="pre">ini</span></code> block
inside.</p>
<p>First of all we start to described <code class="docutils literal notranslate"><span class="pre">MNIST</span></code> driver dataset and pre-processing use for data augmentation at training and test phase:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="c1">; Frame_CUDA for GPU and Frame for CPU</span>
<span class="na">DefaultModel</span><span class="o">=</span><span class="s">Frame_CUDA</span>
<span class="c1">; MNIST Driver Database Instantiation</span>
<span class="k">[database]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">MNIST_IDX_Database</span>
<span class="na">RandomPartitioning</span><span class="o">=</span><span class="s">1</span>
<span class="c1">; Environment Description , batch=256</span>
<span class="k">[env]</span>
<span class="na">SizeX</span><span class="o">=</span><span class="s">32</span>
<span class="na">SizeY</span><span class="o">=</span><span class="s">32</span>
<span class="na">BatchSize</span><span class="o">=</span><span class="s">256</span>
<span class="k">[env.Transformation_0]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">RescaleTransformation</span>
<span class="na">Width</span><span class="o">=</span><span class="s">32</span>
<span class="na">Height</span><span class="o">=</span><span class="s">32</span>
</pre></div>
</div>
<p>In our example we decide to quantize all convolutions and fully-connected layers.
A base block common to all convolution layers can be defined in the <em>.ini</em> file. This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Conv_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Conv</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">0 ; No scaling needed because each conv is followed by batch-normalization layers</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
<span class="k">[common.config]</span>
<span class="na">NoBias</span><span class="o">=</span><span class="s">1</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">None</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.0</span>
</pre></div>
</div>
<p>A base block common to all Full-Connected layers can be defined in the <em>.ini</em> file.
This base block is used to set quantization parameters, like weights bits range, the scaling mode and the quantization mode, and also solver configuration.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Fc_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Fc</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Linear</span>
<span class="na">QWeight</span><span class="o">=</span><span class="s">SAT</span>
<span class="na">QWeight.ApplyScaling</span><span class="o">=</span><span class="s">1 ; Scaling needed because for Full-Conncted</span>
<span class="na">QWeight.ApplyQuantization</span><span class="o">=</span><span class="s">0 ; Only clamp mode for the 1st step</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">common.config</span>
</pre></div>
</div>
<p>A base block common to all Batch-Normalization layers can be defined in the <em>.ini</em> file.
This base block is used to set quantization activations, like activations bits range, the quantization mode, and also solver configuration.
In this first step batch-normalization activation are not quantized yet. We simply defined a typical batch-normalization layer with <code class="docutils literal notranslate"><span class="pre">Rectifier</span></code> as
non-linear activation function.</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="k">[Bn_def]</span>
<span class="na">Type</span><span class="o">=</span><span class="s">BatchNorm</span>
<span class="na">ActivationFunction</span><span class="o">=</span><span class="s">Rectifier</span>
<span class="na">ConfigSection</span><span class="o">=</span><span class="s">bn.config</span>
<span class="k">[bn.config]</span>
<span class="na">Solvers.LearningRate</span><span class="o">=</span><span class="s">0.05</span>
<span class="na">Solvers.LearningRatePolicy</span><span class="o">=</span><span class="s">None</span>
<span class="na">Solvers.Momentum</span><span class="o">=</span><span class="s">0.0</span>
<span class="na">Solvers.Decay</span><span class="o">=</span><span class="s">0.0</span>
</pre></div>
</div>
<p>Finally we described the full backbone of <code class="docutils literal notranslate"><span class="pre">LeNet</span></code> topology:</p>
<div class="highlight-ini notranslate"><div class="highlight"><pre><span></span><span class="na">[conv1] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">env</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>
<span class="na">[bn1] Bn_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv1</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv1]NbOutputs</span>
<span class="c1">; Non-overlapping max pooling P2</span>
<span class="k">[pool1]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">bn1</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">6</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1</span>
<span class="na">[conv2] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool1</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">16</span>
<span class="na">[bn2] Bn_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">conv2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">[conv2]NbOutputs</span>
<span class="k">[pool2]</span>
<span class="na">Input</span><span class="o">=</span><span class="s">bn2</span>
<span class="na">Type</span><span class="o">=</span><span class="s">Pool</span>
<span class="na">PoolWidth</span><span class="o">=</span><span class="s">2</span>
<span class="na">PoolHeight</span><span class="o">=</span><span class="s">2</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">16</span>
<span class="na">Stride</span><span class="o">=</span><span class="s">2</span>
<span class="na">Pooling</span><span class="o">=</span><span class="s">Max</span>
<span class="na">Mapping.Size</span><span class="o">=</span><span class="s">1</span>
<span class="na">[conv3] Conv_def</span>
<span class="na">Input</span><span class="o">=</span><span class="s">pool2</span>
<span class="na">KernelWidth</span><span class="o">=</span><span class="s">5</span>
<span class="na">KernelHeight</span><span class="o">=</span><span class="s">5</span>
<span class="na">NbOutputs</span><span class="o">=</span><span class="s">120</span>
<span class="na">[bn3]Bn_def</span>