From 3d6fb664aac2537b53f7d929cb51d834611c6d0d Mon Sep 17 00:00:00 2001 From: Marco F Date: Tue, 8 Jun 2021 15:35:41 +0200 Subject: [PATCH] Ported to new MFCC --- .gitignore | 1 - MFCC_params_LARGE.h => MFCC_params.h | 8 - MFCC_params_MEDIUM.h | 18 - MFCC_params_SMALL.h | 18 - MFCCmodel.c | 24 +- Makefile | 45 +- MfccConfig.json | 12 + MfccConfig_HighPrec.json | 12 + {at_log => accuracy_log}/MEDIUM.csv | 0 {at_log => accuracy_log}/SMALL.csv | 0 accuracy_log/log_test_large_hp_power.txt | 1721 ++++++++++++ accuracy_log/log_test_large_hp_power_v2.txt | 2376 +++++++++++++++++ accuracy_log/log_test_large_hp_spectr.txt | 3 + accuracy_log/log_test_large_hp_spectr_v2.txt | 27 + accuracy_log/log_test_large_power.txt | 1721 ++++++++++++ accuracy_log/log_test_large_power_v2.txt | 2376 +++++++++++++++++ .../log_test_large_power_v2_norm9.txt | 2349 ++++++++++++++++ accuracy_log/log_test_large_spectr.txt | 14 + accuracy_log/log_test_large_spectr_v2.txt | 27 + .../log_test_large_spectr_v2_norm9.txt | 11 + accuracy_log/log_test_medium_hp_power.txt | 1499 +++++++++++ accuracy_log/log_test_medium_hp_power_v2.txt | 1842 +++++++++++++ accuracy_log/log_test_medium_hp_spectr.txt | 1499 +++++++++++ accuracy_log/log_test_medium_hp_spectr_v2.txt | 1842 +++++++++++++ accuracy_log/log_test_medium_power.txt | 1499 +++++++++++ accuracy_log/log_test_medium_power_v2.txt | 2090 +++++++++++++++ .../log_test_medium_power_v2_norm9.txt | 1850 +++++++++++++ accuracy_log/log_test_medium_spectr.txt | 1499 +++++++++++ accuracy_log/log_test_medium_spectr_v2.txt | 2090 +++++++++++++++ .../log_test_medium_spectr_v2_norm9.txt | 2090 +++++++++++++++ accuracy_log/log_test_small_hp_power.txt | 1457 ++++++++++ accuracy_log/log_test_small_hp_power_v2.txt | 1780 ++++++++++++ accuracy_log/log_test_small_hp_spectr.txt | 1457 ++++++++++ accuracy_log/log_test_small_hp_spectr_v2.txt | 1780 ++++++++++++ accuracy_log/log_test_small_power.txt | 1457 ++++++++++ accuracy_log/log_test_small_power_v2.txt | 2028 ++++++++++++++ accuracy_log/log_test_small_spectr.txt | 1457 ++++++++++ accuracy_log/log_test_small_spectr_v2.txt | 2028 ++++++++++++++ common/model_decl.mk | 3 +- common/model_rules.mk | 27 +- emul.mk | 31 +- main_emulation.c | 27 +- main_with_mfcc.c | 23 +- mfcc_model.mk | 60 +- model/{nntool_script_params => nntool_script} | 9 +- run_test_accuracy.sh | 24 +- utils/test_accuracy_emul.py | 6 +- 47 files changed, 42007 insertions(+), 210 deletions(-) rename MFCC_params_LARGE.h => MFCC_params.h (55%) delete mode 100644 MFCC_params_MEDIUM.h delete mode 100644 MFCC_params_SMALL.h create mode 100644 MfccConfig.json create mode 100644 MfccConfig_HighPrec.json rename {at_log => accuracy_log}/MEDIUM.csv (100%) rename {at_log => accuracy_log}/SMALL.csv (100%) create mode 100644 accuracy_log/log_test_large_hp_power.txt create mode 100644 accuracy_log/log_test_large_hp_power_v2.txt create mode 100644 accuracy_log/log_test_large_hp_spectr.txt create mode 100644 accuracy_log/log_test_large_hp_spectr_v2.txt create mode 100644 accuracy_log/log_test_large_power.txt create mode 100644 accuracy_log/log_test_large_power_v2.txt create mode 100644 accuracy_log/log_test_large_power_v2_norm9.txt create mode 100644 accuracy_log/log_test_large_spectr.txt create mode 100644 accuracy_log/log_test_large_spectr_v2.txt create mode 100644 accuracy_log/log_test_large_spectr_v2_norm9.txt create mode 100644 accuracy_log/log_test_medium_hp_power.txt create mode 100644 accuracy_log/log_test_medium_hp_power_v2.txt create mode 100644 accuracy_log/log_test_medium_hp_spectr.txt create mode 100644 accuracy_log/log_test_medium_hp_spectr_v2.txt create mode 100644 accuracy_log/log_test_medium_power.txt create mode 100644 accuracy_log/log_test_medium_power_v2.txt create mode 100644 accuracy_log/log_test_medium_power_v2_norm9.txt create mode 100644 accuracy_log/log_test_medium_spectr.txt create mode 100644 accuracy_log/log_test_medium_spectr_v2.txt create mode 100644 accuracy_log/log_test_medium_spectr_v2_norm9.txt create mode 100644 accuracy_log/log_test_small_hp_power.txt create mode 100644 accuracy_log/log_test_small_hp_power_v2.txt create mode 100644 accuracy_log/log_test_small_hp_spectr.txt create mode 100644 accuracy_log/log_test_small_hp_spectr_v2.txt create mode 100644 accuracy_log/log_test_small_power.txt create mode 100644 accuracy_log/log_test_small_power_v2.txt create mode 100644 accuracy_log/log_test_small_spectr.txt create mode 100644 accuracy_log/log_test_small_spectr_v2.txt rename model/{nntool_script_params => nntool_script} (56%) diff --git a/.gitignore b/.gitignore index 7a44433..8811567 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ BUILD* *.dat -nntool_script *_emul samples/speech_dataset *__pycache__* diff --git a/MFCC_params_LARGE.h b/MFCC_params.h similarity index 55% rename from MFCC_params_LARGE.h rename to MFCC_params.h index 8ea8977..cf8833d 100644 --- a/MFCC_params_LARGE.h +++ b/MFCC_params.h @@ -1,18 +1,10 @@ #define SAMPLERATE 16000 -#define PREEMP_FACTOR 0.0 #define FRAME_SIZE 640 #define FRAME_STEP 320 -#define N_FRAME 49 #define N_FFT 1024 -#define USE_RADIX_4 0 #define DATA_TYPE 0 -#define USE_POWER 1 -#define OUT_FFT 0 #define MFCC_BANK_CNT 40 #define FMIN 20.0 #define FMAX 4000.0 #define MFCC_COEFF_CNT 494 #define N_DCT 40 -#define OUTPUT_MELSPECTROGRAM 0 -#define LIFTER_COEFF 0 -#define USE_DB 0 diff --git a/MFCC_params_MEDIUM.h b/MFCC_params_MEDIUM.h deleted file mode 100644 index 8ea8977..0000000 --- a/MFCC_params_MEDIUM.h +++ /dev/null @@ -1,18 +0,0 @@ -#define SAMPLERATE 16000 -#define PREEMP_FACTOR 0.0 -#define FRAME_SIZE 640 -#define FRAME_STEP 320 -#define N_FRAME 49 -#define N_FFT 1024 -#define USE_RADIX_4 0 -#define DATA_TYPE 0 -#define USE_POWER 1 -#define OUT_FFT 0 -#define MFCC_BANK_CNT 40 -#define FMIN 20.0 -#define FMAX 4000.0 -#define MFCC_COEFF_CNT 494 -#define N_DCT 40 -#define OUTPUT_MELSPECTROGRAM 0 -#define LIFTER_COEFF 0 -#define USE_DB 0 diff --git a/MFCC_params_SMALL.h b/MFCC_params_SMALL.h deleted file mode 100644 index 8ea8977..0000000 --- a/MFCC_params_SMALL.h +++ /dev/null @@ -1,18 +0,0 @@ -#define SAMPLERATE 16000 -#define PREEMP_FACTOR 0.0 -#define FRAME_SIZE 640 -#define FRAME_STEP 320 -#define N_FRAME 49 -#define N_FFT 1024 -#define USE_RADIX_4 0 -#define DATA_TYPE 0 -#define USE_POWER 1 -#define OUT_FFT 0 -#define MFCC_BANK_CNT 40 -#define FMIN 20.0 -#define FMAX 4000.0 -#define MFCC_COEFF_CNT 494 -#define N_DCT 40 -#define OUTPUT_MELSPECTROGRAM 0 -#define LIFTER_COEFF 0 -#define USE_DB 0 diff --git a/MFCCmodel.c b/MFCCmodel.c index 487e254..28b8685 100644 --- a/MFCCmodel.c +++ b/MFCCmodel.c @@ -1,23 +1,14 @@ #include "AutoTilerLib.h" #include "AutoTilerLibTypes.h" -#include "MfccGenerator.h" -#ifdef SMALL - #include "MFCC_params_SMALL.h" -#endif -#ifdef MEDIUM - #include "MFCC_params_MEDIUM.h" -#endif -#ifdef LARGE - #include "MFCC_params_LARGE.h" -#endif - +#include "DSP_Generators.h" +#include "MFCC_params.h" void MFCCConfiguration(unsigned int L1Memory) { SetInlineMode(ALWAYS_INLINE); - SetSymbolDynamics(); + SetSymbolDynamics(); - SetUsedFilesNames(0, 1, "MfccBasicKernels.h"); + SetUsedFilesNames(0, 3, "MfccBasicKernels.h", "CmplxFunctions.h", "PreProcessing.h"); SetGeneratedFilesNames("MFCCKernels.c", "MFCCKernels.h"); SetL1MemorySize(L1Memory); @@ -25,13 +16,12 @@ void MFCCConfiguration(unsigned int L1Memory) int main(int argc, char **argv) { - if (TilerParseOptions(argc, argv)) GenTilingError("Failed to initialize or incorrect output arguments directory.\n"); + if (TilerParseOptions(argc, argv)) GenTilingError("Failed to initialize or incorrect output arguments directory.\n"); // Set Auto Tiler configuration, given shared L1 memory is 51200 - MFCCConfiguration(51200); + MFCCConfiguration(64*1024); // Load FIR basic kernels LoadMFCCLibrary(); - MFCC_Generator("MFCC", 0, N_FRAME, FRAME_SIZE, FRAME_STEP, N_FFT, MFCC_COEFF_CNT, N_DCT, PREEMP_FACTOR, LIFTER_COEFF, USE_RADIX_4, USE_POWER, DATA_TYPE, MFCC_BANK_CNT, 0, 0); - //MFCC_Generator("MFCC_single", 0, 1, FRAME_SIZE, FRAME_STEP, N_FFT, MFCC_COEFF_CNT, N_DCT, PREEMP_FACTOR, LIFTER_COEFF, USE_RADIX_4, USE_POWER, DATA_TYPE, MFCC_BANK_CNT, 0, 0); + MFCC_Generator("MFCC", 0, 49, FRAME_SIZE, FRAME_STEP, N_FFT, MFCC_BANK_CNT, MFCC_COEFF_CNT, N_DCT, 0, 0, 0, USE_POWER, DATA_TYPE, 1, 0); GenerateTilingCode(); } diff --git a/Makefile b/Makefile index 8415846..d8a8c6e 100644 --- a/Makefile +++ b/Makefile @@ -56,17 +56,14 @@ QUANT_BITS=8 BUILD_DIR=BUILD #MODEL_NE16 ?= 0 -NNTOOL_SCRIPT_PARAMETRIC=model/nntool_script_params -MODEL_SUFFIX = _$(QUANT_BITS)BIT +NNTOOL_SCRIPT=model/nntool_script +MODEL_SUFFIX = _SQ$(QUANT_BITS) ifeq ($(MODEL_NE16), 1) - NNTOOL_SCRIPT_PARAMETRIC=model/nntool_script_ne16_params + NNTOOL_SCRIPT=model/nntool_script_ne16 MODEL_SUFFIX = _NE16 endif -ifeq ($(DUMP_TENSORS), 1) - NNTOOL_SET_GRAPH_DUMP = set graph_dump_tensor 7 -endif NNTOOL_EXTRA_FLAGS = -q -MODEL_BUILD = BUILD_MODEL_$(QUANT_BITS)BIT +MODEL_BUILD = BUILD_MODEL$(MODEL_SUFFIX) CLUSTER_STACK_SIZE=4096 CLUSTER_SLAVE_STACK_SIZE=1024 @@ -83,26 +80,24 @@ endif FREQ_FC?=250 VOLTAGE?=1.2 -include common/model_decl.mk ifeq ($(USE_POWER), 1) # override the tflite model name to the one which expects power MFCC -> more efficient - TRAINED_TFLITE_MODEL=model/$(MODEL_PREFIX)_power.tflite + TRAINED_MODEL=model/$(MODEL_PREFIX)_power.tflite +else + TRAINED_MODEL=model/$(MODEL_PREFIX).tflite endif +include common/model_decl.mk +include mfcc_model.mk ifeq ($(WITH_MFCC), 1) APP_SRCS += main_with_mfcc.c $(MODEL_GEN_C) $(MODEL_COMMON_SRCS) $(CNN_LIB) - APP_SRCS += $(GAP_LIB_PATH)/wav_io/wavIO.c $(MFCC_KER_SRCS) $(MFCCBUILD_DIR)/MFCCKernels.c + APP_SRCS += $(GAP_LIB_PATH)/wav_io/wavIO.c $(MFCCBUILD_DIR)/MFCCKernels.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/TwiddlesDef.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/RFFTTwiddlesDef.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/SwapTablesDef.c + APP_SRCS += $(TILER_DSP_KERNEL_PATH)/MfccBasicKernels.c $(TILER_DSP_KERNEL_PATH)/FFT_Library.c $(TILER_DSP_KERNEL_PATH)/CmplxFunctions.c $(TILER_DSP_KERNEL_PATH)/PreProcessing.c APP_CFLAGS += -I$(MFCC_GENERATOR) -I$(MFCCBUILD_DIR) else APP_SRCS += main.c $(MODEL_GEN_C) $(MODEL_COMMON_SRCS) $(CNN_LIB) endif -ifeq ($(USE_POWER), 0) - APP_CFLAGS += -DUSE_ABS -endif -ifeq ($(USE_HIGH_PREC), 1) - APP_CFLAGS += -DHIGH_PREC_FFT -endif ifeq ($(PMSIS_OS), freertos) APP_CFLAGS += -DFREERTOS endif @@ -111,7 +106,7 @@ APP_CFLAGS += -O3 -s -mno-memcpy -fno-tree-loop-distribute-patterns APP_CFLAGS += -I. -I$(MODEL_COMMON_INC) -I$(TILER_EMU_INC) -I$(TILER_INC) -I$(MODEL_BUILD) $(CNN_LIB_INCLUDE) APP_CFLAGS += -DAT_MODEL_PREFIX=$(MODEL_PREFIX) $(MODEL_SIZE_CFLAGS) APP_CFLAGS += -DSTACK_SIZE=$(CLUSTER_STACK_SIZE) -DSLAVE_STACK_SIZE=$(CLUSTER_SLAVE_STACK_SIZE) -DFREQ_FC=$(FREQ_FC) -DFREQ_CL=$(FREQ_CL) -DVOLTAGE=$(VOLTAGE) -APP_CFLAGS += -DAT_IMAGE=$(IMAGE) -DAT_WAV=$(WAV_PATH) #-DWRITE_WAV #-DPRINT_AT_INPUT #-DPRINT_WAV +APP_CFLAGS += -DAT_IMAGE=$(IMAGE) -DAT_WAV=$(WAV_PATH) ifeq ($(USE_MIC_SENSORBOARD), 1) APP_CFLAGS += -DFROM_SENSOR -DSILENT @@ -127,26 +122,18 @@ generate_samples: python3 utils/generate_samples_images.py --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) test_accuracy: - python3 utils/test_accuracy_emul.py --tflite_model $(TRAINED_TFLITE_MODEL) --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) --test_with_wav $(WITH_MFCC) --use_power_spectrogram $(USE_POWER) --use_high_prec $(USE_HIGH_PREC) + python3 utils/test_accuracy_emul.py --tflite_model $(TRAINED_MODEL) --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) --test_with_wav $(WITH_MFCC) --use_power_spectrogram $(USE_POWER) test_accuracy_tflite: - python3 utils/test_accuracy_tflite.py --tflite_model $(TRAINED_TFLITE_MODEL) --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) --use_power_spectrogram $(USE_POWER) + python3 utils/test_accuracy_tflite.py --tflite_model $(TRAINED_MODEL) --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) --use_power_spectrogram $(USE_POWER) # all depends on the model -all:: model mfcc_model - -clean:: clean_model clean_mfcc_model +all:: $(MFCCBUILD_DIR)/MFCCKernels.c model -clean_at_model: - $(RM) $(MODEL_GEN_EXE) +clean:: clean_model clean_mfcc_code at_model_disp:: $(MODEL_BUILD) $(MODEL_GEN_EXE) $(MODEL_GEN_EXE) -o $(MODEL_BUILD) -c $(MODEL_BUILD) $(MODEL_GEN_EXTRA_FLAGS) --debug=Disp -at_model:: $(MODEL_BUILD) $(MODEL_GEN_EXE) - $(MODEL_GEN_EXE) -o $(MODEL_BUILD) -c $(MODEL_BUILD) $(MODEL_GEN_EXTRA_FLAGS) - include common/model_rules.mk -include mfcc_model.mk - include $(GAP_SDK_HOME)/tools/rules/pmsis_rules.mk diff --git a/MfccConfig.json b/MfccConfig.json new file mode 100644 index 0000000..f7eadef --- /dev/null +++ b/MfccConfig.json @@ -0,0 +1,12 @@ +{ + "n_fft": 1024, + "frame_size": 640, + "frame_step": 320, + "sample_rate": 16000, + "mfcc_bank_cnt": 40, + "n_dct": 40, + "fmin": 20.0, + "fmax": 4000.0, + "dtype": "fix16", + "use_tf_mfcc": true +} \ No newline at end of file diff --git a/MfccConfig_HighPrec.json b/MfccConfig_HighPrec.json new file mode 100644 index 0000000..6bbb575 --- /dev/null +++ b/MfccConfig_HighPrec.json @@ -0,0 +1,12 @@ +{ + "n_fft": 1024, + "frame_size": 640, + "frame_step": 320, + "sample_rate": 16000, + "mfcc_bank_cnt": 40, + "n_dct": 40, + "fmin": 20.0, + "fmax": 4000.0, + "dtype": "fix32_scal", + "use_tf_mfcc": true +} \ No newline at end of file diff --git a/at_log/MEDIUM.csv b/accuracy_log/MEDIUM.csv similarity index 100% rename from at_log/MEDIUM.csv rename to accuracy_log/MEDIUM.csv diff --git a/at_log/SMALL.csv b/accuracy_log/SMALL.csv similarity index 100% rename from at_log/SMALL.csv rename to accuracy_log/SMALL.csv diff --git a/accuracy_log/log_test_large_hp_power.txt b/accuracy_log/log_test_large_hp_power.txt new file mode 100644 index 0000000..073bfa4 --- /dev/null +++ b/accuracy_log/log_test_large_hp_power.txt @@ -0,0 +1,1721 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_l_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_l_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT|nodeoption 1 PARALLELFEATURES 0|g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_9[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_9_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_10 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_9[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_10[0] +eliminate_transposes - looking down at CONV_2D_0_10_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_11[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_11 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_10[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_11[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_12[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_12 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_11[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_9 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_9 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_10 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_10 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_10 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_10 +eliminate_transposes_actions - CONV_2D_0_10 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_10 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_11 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_11 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_12 in with shape 1x1x276 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_12 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_9,DEPTHWISE_CONV_2D_0_9_activation +match_gap_conv - fusing nodes CONV_2D_0_10,CONV_2D_0_10_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_13 to node FULLY_CONNECTED_0_12 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 276x25x10 | 0 | 69490 | 11316 | 2.76M | F 276x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 276x25x10 | 276x13x5 | 1 | 86940 | 285 | 161.46K | F 276x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 2 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 3 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 4 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 5 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 6 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 7 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 8 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | DEPTHWISE_CONV_2D_0_9_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 9 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | CONV_2D_0_10_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 10 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | AVERAGE_POOL_2D_0_11 | average_pool | 276x13x5 | 276x1x1 | 11 | 18216 | 0 | 18.22K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | FULLY_CONNECTED_0_12 | linear | 276x1x1 | 12 | 12 | 288 | 3324 | 3.31K | F 12x276x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 14 | SOFTMAX_0_13 | softmax | 12 | 12 | 13 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 15 | output_1 | output | 12 | 12 | 14 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 86940 | 398325 | 28.35M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 485265 | 28.35M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 62586<16.72 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -16.85>chan | Q32.0 | Q32.0 | +| | D_0_1 | 2586<16.72 | 24377<19.84 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -20.00>chan | Q32.0 | Q32.0 | +| | | 4377<19.84 | 00155<17.78 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -17.92>chan | Q32.0 | Q32.0 | +| | D_0_3 | 0155<17.78 | 64949<17.10 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -17.24>chan | Q32.0 | Q32.0 | +| | | 4949<17.10 | 43959<10.60 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.68>chan | Q32.0 | Q32.0 | +| | D_0_5 | 3959<10.60 | 66984<12.53 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -12.63>chan | Q32.0 | Q32.0 | +| | | 6984<12.53 | 0435<8.73 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -8.79>chan | Q32.0 | Q32.0 | +| | D_0_7 | 435<8.73 | 59043<11.76 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -11.85>chan | Q32.0 | Q32.0 | +| | | 9043<11.76 | 4307<8.12 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -8.18>chan | Q32.0 | Q32.0 | +| | D_0_9 | 307<8.12 | 84755<12.17 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 10 | DEPTHWISE_CONV_2 | -12.27>chan | Q32.0 | Q32.0 | +| | | 4755<12.17 | 1018<8.56 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 11 | CONV_2D_0_10_act | -8.63>chan | Q32.0 | Q32.0 | +| | 0_12 | 018<8.56 | 00000<33.61 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 14 | SOFTMAX_0_13 | -33.88 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerSetBiasB32_SQ8 + ConvKerName: KerConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerReduct_CC_ReLU_SQ8 +Nb Oper : 2829000 + +==== Process Tiling For User Kernel: S1_Conv2d_276x1x10x4_Relu ======================= +S1_Conv2d_276x1x10x4_Relu Partition[0] Size = 1373 (Min: 200, Max: 3017), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_276x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 2.000000, FixDim: 10, VarDim: 49 [ 49], Size: 984, Total: 984, Move: 135240 (Decl x 276.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 2088, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 2364, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 2640, Move: 276 (Decl x 1.000000) L2 + Filter : Ratio: 0.000000, Size: 80, Total: 2720, Move: 11040 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 25 [ 25], Size: 504, Total: 3224, Move: 69000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 25 [ 25], Size: 1000, Total: 4224, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 4236, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_276x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 4236, L2Move: 216945, L3Move: 0, Tiling Overhead: 2.639394 +S1_Conv2d_276x1x10x4_Relu Iteration for Tiled Space: 1 +S1_Conv2d_276x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 4236, Reusable Memory: 44500, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S2_Conv2d_276x1x3x3_Relu ======================= +S2_Conv2d_276x1x3x3_Relu Partition[0] Size = 102161 (Min: 60, Max: 265225), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 1104 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 276 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 276 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 2484 bytes buffer +S2_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 69000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 25104, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 25380, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 25656, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 28140, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 34380, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 46860, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 46872, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 46872, L2Move: 91089, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 48 [48*5 + 36], Iteration for Tiled Space: 1 +S2_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 46872, Reusable Memory: 1864, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_276x276x1x1_Relu ======================= +S3_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S3_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S3_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S3_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S3_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S3_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S3_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S3_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S4_Conv2d_276x1x3x3_Relu ======================= +S4_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S4_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S4_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_276x276x1x1_Relu ======================= +S5_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S5_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S5_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S5_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S5_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S5_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S5_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S5_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S6_Conv2d_276x1x3x3_Relu ======================= +S6_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S6_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S6_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_276x276x1x1_Relu ======================= +S7_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S7_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S7_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S7_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S7_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S7_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S7_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S8_Conv2d_276x1x3x3_Relu ======================= +S8_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S8_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S8_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_276x276x1x1_Relu ======================= +S9_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S9_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S9_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S9_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S9_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S9_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S9_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S9_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S10_Conv2d_276x1x3x3_Relu ======================= +S10_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S10_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S10_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S11_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S11_Conv2d_276x276x1x1_Relu ======================= +S11_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S11_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S11_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S11_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S11_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S11_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S11_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S11_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S11_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S11_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 17940 + +==== Process Tiling For User Kernel: S12_AveragePool_13x5 ======================= +S12_AveragePool_13x5 Partition[0] Size = 37005 (Min: 130, Max: 36477), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_AveragePool_13x5, Total Raw Memory: 18228 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S12_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 18228, Reusable Memory: 30508, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S13_Linear_12x276x1x1, Linear: InDim: 276, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S13_Linear_12x276x1x1 ======================= +S13_Linear_12x276x1x1 Partition[0] Size = 7105 (Min: 0, Max: 7273), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S13_Linear_12x276x1x1, Total Raw Memory: 3684 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S13_Linear_12x276x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 3684, Reusable Memory: 45052, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S14_SoftMax ======================= + S14_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S14_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S14_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S13_Output[ In] Adding Edge From S13_Linear_12x276x1x1 To S14_SoftMax New + Symbol: S12_Output[ In] Adding Edge From S12_AveragePool_13x5 To S13_Linear_12x276x1x1 New + Symbol: S11_Output[ In] Adding Edge From S11_Conv2d_276x276x1x1_Relu To S12_AveragePool_13x5 New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_276x1x3x3_Relu To S11_Conv2d_276x276x1x1_Relu New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_276x276x1x1_Relu To S10_Conv2d_276x1x3x3_Relu New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_276x1x3x3_Relu To S9_Conv2d_276x276x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_276x276x1x1_Relu To S8_Conv2d_276x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_276x1x3x3_Relu To S7_Conv2d_276x276x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_276x276x1x1_Relu To S6_Conv2d_276x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_276x1x3x3_Relu To S5_Conv2d_276x276x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_276x276x1x1_Relu To S4_Conv2d_276x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_276x1x3x3_Relu To S3_Conv2d_276x276x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_276x1x10x4_Relu To S2_Conv2d_276x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S14_SoftMax To __GraphExit__ New + Symbol: S14_Infos[ In] Adding Edge From __GraphEntry__ To S14_SoftMax New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 New + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Biases[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Weights[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_AveragePool_13x5 New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu New + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Biases[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Weights[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 86940 => Alloc: OK + +After Const Allocation, TopL3: 304704, TopL2: 284517 => Alloc: OK + +[FULL] Remapping [163116 .. 284516] to [0 .. 121400] Align compensation: 3 +[PART] Remapping [0 .. 163115] to [121404 .. 284519] Align compensation: 0 +[PART] Remapping [284517 .. 349999] to [284520 .. 350002] Align compensation: 1 +[FULL] Remapping [0 .. 304703] to [0 .. 304703] Align compensation: 0 +[PART] Remapping [304704 .. 6388607] to [304704 .. 6388607] Align compensation: 0 +Symbol allocation for graph KWS_ds_cnn_l_quantCNN is sucessfull, L2: 284517 out of 350000, L3: 304704 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S10_Weights + (null) => S10_Biases + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + (null) => S13_Infos + (null) => S13_Weights + (null) => S13_Biases + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S14_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 13 14 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_276x1x10x4_Relu, Operations: 2829000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 135240, TileOverhead: 276.000000, L2Buff: 0, Addr: 0 +CI Filter => S1_Weights --L2-- Size: 11040, L3_Move: 0, L2_Move: 11040, TileOverhead: 1.000000, L2Buff: 0, Addr: 2640 +CI Buff Bias => S1_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 984 + O Out => S1_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 2720 +CI Buff Scale => S1_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 2088 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 2364 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 4224 + Kernel Memory : L3: 0, L2: 82195 + Kernel Total Memory: 82195, L3 moves: 0, L2 moves: 216945, Move overhead: 2.639394 + Kernel Operations : 2829000 [KernelOper/GraphOper: 9.924761%], Move/Operation ratio: [L3: 0.000000, L2: 0.076686] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S1_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 25656 +CI Buff Bias => S2_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28140 +CI Buff Scale => S2_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25104 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25380 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 46860 + Kernel Memory : L3: 0, L2: 91089 + Kernel Total Memory: 91089, L3 moves: 0, L2 moves: 91089, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.507742] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S2_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S3_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S3_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S3_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S3_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S4_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S4_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S4_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S5_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S5_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S5_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S5_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S6_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S6_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S6_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S7_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S7_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S8_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S8_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S8_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S9_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S9_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S9_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S9_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S10_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S10_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S10_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S11_Weights --L2-- Size: 76176, L3_Move: 0, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S11_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S11_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S11_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 0, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.000000, L2: 0.044706] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_AveragePool_13x5, Operations: 17940 + I Buff In => S11_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S12_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 17940 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 18216 + Kernel Memory : L3: 0, L2: 18225 + Kernel Total Memory: 18225, L3 moves: 0, L2 moves: 18225, Move overhead: 1.000000 + Kernel Operations : 17940 [KernelOper/GraphOper: 0.062938%], Move/Operation ratio: [L3: 0.000000, L2: 1.015886] + Successors: 13 + + Living Dynamic Symbols: [S11_Output] [S12_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: UKer S13_Linear_12x276x1x1, Operations: 3312 + I Buff In => S12_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S13_Weights --L2-- Size: 3312, L3_Move: 0, L2_Move: 3312, TileOverhead: 1.000000, L2Buff: 0, Addr: 276 +CI Buff Bias => S13_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 3588 + O Buff Out => S13_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3636 +CI Buff Scale => S13_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3648 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3660 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 3672 + Kernel Memory : L3: 0, L2: 3681 + Kernel Total Memory: 3681, L3 moves: 0, L2 moves: 3681, Move overhead: 1.000000 + Kernel Operations : 3312 [KernelOper/GraphOper: 0.011619%], Move/Operation ratio: [L3: 0.000000, L2: 1.111413] + Successors: 14 + + Living Dynamic Symbols: [S12_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 14, Channel 0 0: UKer S14_SoftMax, Operations: 12 + I Buff In => S13_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S14_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000042%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 15 + + Living Dynamic Symbols: [Output_1] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 15, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 113721 + Graph nodes min global memory: L3: 76176, L2: 113724 + Graph sum of kernel arguments size: 923956, L3 moves: 304704, L2 moves: 1596906, Move overhead: 1.728335 + Graph total operations: 28504464 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 923956, Total L3_Move: 304704, Total L2_Move: 1596906, Tiling Overhead Average: 1.728335 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic -228528 86940 + Const 304704 77844 + Total 76176 113724 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 15]@ 425916 LOAD: L2[ 0: 15]@ 121212 EXEC: L2[ 0: 15]@ 121212 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 15]@ 380880 LOAD: L2[ 0: 15]@ 76176 EXEC: L2[ 0: 15]@ 76176 , Size: 11040 + S1_Biases INSTALL: HyperFlash[ 0: 15]@ 407652 LOAD: L2[ 0: 15]@ 102948 EXEC: L2[ 0: 15]@ 102948 , Size: 1104 + S1_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 419796 LOAD: L2[ 0: 15]@ 115092 EXEC: L2[ 0: 15]@ 115092 , Size: 276 + S1_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420072 LOAD: L2[ 0: 15]@ 115368 EXEC: L2[ 0: 15]@ 115368 , Size: 276 + S2_Infos INSTALL: HyperFlash[ 0: 15]@ 425928 LOAD: L2[ 0: 15]@ 121224 EXEC: L2[ 0: 15]@ 121224 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 15]@ 395232 LOAD: L2[ 0: 15]@ 90528 EXEC: L2[ 0: 15]@ 90528 , Size: 2484 + S2_Biases INSTALL: HyperFlash[ 0: 15]@ 408756 LOAD: L2[ 0: 15]@ 104052 EXEC: L2[ 0: 15]@ 104052 , Size: 1104 + S2_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420348 LOAD: L2[ 0: 15]@ 115644 EXEC: L2[ 0: 15]@ 115644 , Size: 276 + S2_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420624 LOAD: L2[ 0: 15]@ 115920 EXEC: L2[ 0: 15]@ 115920 , Size: 276 + S3_Infos INSTALL: HyperFlash[ 0: 15]@ 425940 LOAD: L2[ 0: 15]@ 121236 EXEC: L2[ 0: 15]@ 121236 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 15]@ 0 LOAD: HyperRam[ 0: 15]@ 0 EXEC: L2[ 1: 3]@ 208344 , Size: 76176 + S3_Biases INSTALL: HyperFlash[ 0: 15]@ 409860 LOAD: L2[ 0: 15]@ 105156 EXEC: L2[ 0: 15]@ 105156 , Size: 1104 + S3_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420900 LOAD: L2[ 0: 15]@ 116196 EXEC: L2[ 0: 15]@ 116196 , Size: 276 + S3_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421176 LOAD: L2[ 0: 15]@ 116472 EXEC: L2[ 0: 15]@ 116472 , Size: 276 + S4_Infos INSTALL: HyperFlash[ 0: 15]@ 425952 LOAD: L2[ 0: 15]@ 121248 EXEC: L2[ 0: 15]@ 121248 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 15]@ 397716 LOAD: L2[ 0: 15]@ 93012 EXEC: L2[ 0: 15]@ 93012 , Size: 2484 + S4_Biases INSTALL: HyperFlash[ 0: 15]@ 410964 LOAD: L2[ 0: 15]@ 106260 EXEC: L2[ 0: 15]@ 106260 , Size: 1104 + S4_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 421452 LOAD: L2[ 0: 15]@ 116748 EXEC: L2[ 0: 15]@ 116748 , Size: 276 + S4_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421728 LOAD: L2[ 0: 15]@ 117024 EXEC: L2[ 0: 15]@ 117024 , Size: 276 + S5_Infos INSTALL: HyperFlash[ 0: 15]@ 425964 LOAD: L2[ 0: 15]@ 121260 EXEC: L2[ 0: 15]@ 121260 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 15]@ 76176 LOAD: HyperRam[ 0: 15]@ 76176 EXEC: L2[ 4: 5]@ 157284 , Size: 76176 + S5_Biases INSTALL: HyperFlash[ 0: 15]@ 412068 LOAD: L2[ 0: 15]@ 107364 EXEC: L2[ 0: 15]@ 107364 , Size: 1104 + S5_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422004 LOAD: L2[ 0: 15]@ 117300 EXEC: L2[ 0: 15]@ 117300 , Size: 276 + S5_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422280 LOAD: L2[ 0: 15]@ 117576 EXEC: L2[ 0: 15]@ 117576 , Size: 276 + S6_Infos INSTALL: HyperFlash[ 0: 15]@ 425976 LOAD: L2[ 0: 15]@ 121272 EXEC: L2[ 0: 15]@ 121272 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 15]@ 400200 LOAD: L2[ 0: 15]@ 95496 EXEC: L2[ 0: 15]@ 95496 , Size: 2484 + S6_Biases INSTALL: HyperFlash[ 0: 15]@ 413172 LOAD: L2[ 0: 15]@ 108468 EXEC: L2[ 0: 15]@ 108468 , Size: 1104 + S6_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422556 LOAD: L2[ 0: 15]@ 117852 EXEC: L2[ 0: 15]@ 117852 , Size: 276 + S6_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422832 LOAD: L2[ 0: 15]@ 118128 EXEC: L2[ 0: 15]@ 118128 , Size: 276 + S7_Infos INSTALL: HyperFlash[ 0: 15]@ 425988 LOAD: L2[ 0: 15]@ 121284 EXEC: L2[ 0: 15]@ 121284 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 15]@ 152352 LOAD: HyperRam[ 0: 15]@ 152352 EXEC: L2[ 6: 7]@ 157284 , Size: 76176 + S7_Biases INSTALL: HyperFlash[ 0: 15]@ 414276 LOAD: L2[ 0: 15]@ 109572 EXEC: L2[ 0: 15]@ 109572 , Size: 1104 + S7_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423108 LOAD: L2[ 0: 15]@ 118404 EXEC: L2[ 0: 15]@ 118404 , Size: 276 + S7_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423384 LOAD: L2[ 0: 15]@ 118680 EXEC: L2[ 0: 15]@ 118680 , Size: 276 + S8_Infos INSTALL: HyperFlash[ 0: 15]@ 426000 LOAD: L2[ 0: 15]@ 121296 EXEC: L2[ 0: 15]@ 121296 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 15]@ 402684 LOAD: L2[ 0: 15]@ 97980 EXEC: L2[ 0: 15]@ 97980 , Size: 2484 + S8_Biases INSTALL: HyperFlash[ 0: 15]@ 415380 LOAD: L2[ 0: 15]@ 110676 EXEC: L2[ 0: 15]@ 110676 , Size: 1104 + S8_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423660 LOAD: L2[ 0: 15]@ 118956 EXEC: L2[ 0: 15]@ 118956 , Size: 276 + S8_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423936 LOAD: L2[ 0: 15]@ 119232 EXEC: L2[ 0: 15]@ 119232 , Size: 276 + S9_Infos INSTALL: HyperFlash[ 0: 15]@ 426012 LOAD: L2[ 0: 15]@ 121308 EXEC: L2[ 0: 15]@ 121308 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 15]@ 228528 LOAD: HyperRam[ 0: 15]@ 228528 EXEC: L2[ 8: 9]@ 175224 , Size: 76176 + S9_Biases INSTALL: HyperFlash[ 0: 15]@ 416484 LOAD: L2[ 0: 15]@ 111780 EXEC: L2[ 0: 15]@ 111780 , Size: 1104 + S9_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424212 LOAD: L2[ 0: 15]@ 119508 EXEC: L2[ 0: 15]@ 119508 , Size: 276 + S9_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 424488 LOAD: L2[ 0: 15]@ 119784 EXEC: L2[ 0: 15]@ 119784 , Size: 276 + S10_Infos INSTALL: HyperFlash[ 0: 15]@ 426024 LOAD: L2[ 0: 15]@ 121320 EXEC: L2[ 0: 15]@ 121320 , Size: 9 + S10_Weights INSTALL: HyperFlash[ 0: 15]@ 405168 LOAD: L2[ 0: 15]@ 100464 EXEC: L2[ 0: 15]@ 100464 , Size: 2484 + S10_Biases INSTALL: HyperFlash[ 0: 15]@ 417588 LOAD: L2[ 0: 15]@ 112884 EXEC: L2[ 0: 15]@ 112884 , Size: 1104 + S10_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424764 LOAD: L2[ 0: 15]@ 120060 EXEC: L2[ 0: 15]@ 120060 , Size: 276 + S10_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425040 LOAD: L2[ 0: 15]@ 120336 EXEC: L2[ 0: 15]@ 120336 , Size: 276 + S11_Infos INSTALL: HyperFlash[ 0: 15]@ 426036 LOAD: L2[ 0: 15]@ 121332 EXEC: L2[ 0: 15]@ 121332 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 15]@ 304704 LOAD: L2[ 0: 15]@ 0 EXEC: L2[ 0: 15]@ 0 , Size: 76176 + S11_Biases INSTALL: HyperFlash[ 0: 15]@ 418692 LOAD: L2[ 0: 15]@ 113988 EXEC: L2[ 0: 15]@ 113988 , Size: 1104 + S11_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 425316 LOAD: L2[ 0: 15]@ 120612 EXEC: L2[ 0: 15]@ 120612 , Size: 276 + S11_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425592 LOAD: L2[ 0: 15]@ 120888 EXEC: L2[ 0: 15]@ 120888 , Size: 276 + S12_Infos INSTALL: HyperFlash[ 0: 15]@ 426048 LOAD: L2[ 0: 15]@ 121344 EXEC: L2[ 0: 15]@ 121344 , Size: 9 + S13_Infos INSTALL: HyperFlash[ 0: 15]@ 426060 LOAD: L2[ 0: 15]@ 121356 EXEC: L2[ 0: 15]@ 121356 , Size: 9 + S13_Weights INSTALL: HyperFlash[ 0: 15]@ 391920 LOAD: L2[ 0: 15]@ 87216 EXEC: L2[ 0: 15]@ 87216 , Size: 3312 + S13_Biases INSTALL: HyperFlash[ 0: 15]@ 425868 LOAD: L2[ 0: 15]@ 121164 EXEC: L2[ 0: 15]@ 121164 , Size: 48 + S13_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 426072 LOAD: L2[ 0: 15]@ 121368 EXEC: L2[ 0: 15]@ 121368 , Size: 12 + S13_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 426084 LOAD: L2[ 0: 15]@ 121380 EXEC: L2[ 0: 15]@ 121380 , Size: 12 + S14_Infos INSTALL: HyperFlash[ 0: 15]@ 426096 LOAD: L2[ 0: 15]@ 121392 EXEC: L2[ 0: 15]@ 121392 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 139344 , Size: 69000 + S2_Output EXEC: L2[ 2: 3]@ 121404 , Size: 17940 + S3_Output EXEC: L2[ 3: 4]@ 139344 , Size: 17940 + S4_Output EXEC: L2[ 4: 5]@ 121404 , Size: 17940 + S5_Output EXEC: L2[ 5: 6]@ 139344 , Size: 17940 + S6_Output EXEC: L2[ 6: 7]@ 121404 , Size: 17940 + S7_Output EXEC: L2[ 7: 8]@ 139344 , Size: 17940 + S8_Output EXEC: L2[ 8: 9]@ 157284 , Size: 17940 + S9_Output EXEC: L2[ 9: 10]@ 121404 , Size: 17940 + S10_Output EXEC: L2[ 10: 11]@ 139344 , Size: 17940 + S11_Output EXEC: L2[ 11: 12]@ 121404 , Size: 17940 + S12_Output EXEC: L2[ 12: 13]@ 139344 , Size: 276 + S13_Output EXEC: L2[ 13: 14]@ 121404 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_276x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S10_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S11_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S12_AveragePool_13x5 +Generating Code For User Kernel: S13_Linear_12x276x1x1 +Generating Code For User Kernel: S14_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 11040 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Weights.tensor: 3312 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S14_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_l_quant_L3_Flash_Const.dat (size 426108) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 46872 +L2 Memory size (Bytes) : Given: 350000, Used: 284517 +L3 Memory size (Bytes) : Given: 6388608, Used: 304704 + +L3 Memory bandwidth for 1 graph run : 304704 Bytes +L2 Memory bandwidth for 1 graph run : 1596906 Bytes +Sum of all Kernels arguments size : 923956 Bytes +Tiling Bandwith overhead : 1.728335 Move/KerArgSize +Sum of baseline bandwidth : 50099340 Bytes +Percentage of baseline BW for L2 : 3.18748 % +Percentage of baseline BW for L3 : 0.6082 % +Sum of all Kernels operations : 28504464 Operations +Total amount of flash coefficients : 426108 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_l_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_l_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_l_quantKernels.h Header file for the generated C code + KWS_ds_cnn_l_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_LARGE.h --use_power --use_high_prec +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DLARGE +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 33086 (Min: 0, Max: 33814), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 6.400000, Size: 8192, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 6.400000, Size: 8192, Total: 22864, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 23888, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 24048, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 24052, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 26100, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 30196, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 32244, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 32564, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 33552, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 33592, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 36792, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 36792, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 36792, Reusable Memory: 11944, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 36792 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DLARGE -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 195/ 200 Accuracy: 97.50% +Pred/Tot: 287/ 300 Accuracy: 95.67% +Pred/Tot: 382/ 400 Accuracy: 95.50% +Pred/Tot: 477/ 500 Accuracy: 95.40% +Pred/Tot: 571/ 600 Accuracy: 95.17% +Pred/Tot: 665/ 700 Accuracy: 95.00% +Pred/Tot: 756/ 800 Accuracy: 94.50% +Pred/Tot: 850/ 900 Accuracy: 94.44% +Pred/Tot: 945/1000 Accuracy: 94.50% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1133/1200 Accuracy: 94.42% +Pred/Tot: 1226/1300 Accuracy: 94.31% +Pred/Tot: 1321/1400 Accuracy: 94.36% +Pred/Tot: 1415/1500 Accuracy: 94.33% +Pred/Tot: 1512/1600 Accuracy: 94.50% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1704/1800 Accuracy: 94.67% +Pred/Tot: 1799/1900 Accuracy: 94.68% +Pred/Tot: 1896/2000 Accuracy: 94.80% +Pred/Tot: 1990/2100 Accuracy: 94.76% +Pred/Tot: 2088/2200 Accuracy: 94.91% +Pred/Tot: 2184/2300 Accuracy: 94.96% +Pred/Tot: 2276/2400 Accuracy: 94.83% +Pred/Tot: 2369/2500 Accuracy: 94.76% +Pred/Tot: 2466/2600 Accuracy: 94.85% +Pred/Tot: 2561/2700 Accuracy: 94.85% +Pred/Tot: 2656/2800 Accuracy: 94.86% +Pred/Tot: 2750/2900 Accuracy: 94.83% +Pred/Tot: 2845/3000 Accuracy: 94.83% +Pred/Tot: 2943/3100 Accuracy: 94.94% +Pred/Tot: 3035/3200 Accuracy: 94.84% +Pred/Tot: 3131/3300 Accuracy: 94.88% +Pred/Tot: 3222/3400 Accuracy: 94.76% +Pred/Tot: 3319/3500 Accuracy: 94.83% +Pred/Tot: 3416/3600 Accuracy: 94.89% +Pred/Tot: 3510/3700 Accuracy: 94.86% +Pred/Tot: 3605/3800 Accuracy: 94.87% +Pred/Tot: 3695/3900 Accuracy: 94.74% +Pred/Tot: 3791/4000 Accuracy: 94.77% +Pred/Tot: 3886/4100 Accuracy: 94.78% +Pred/Tot: 3980/4200 Accuracy: 94.76% +Pred/Tot: 4073/4300 Accuracy: 94.72% +Pred/Tot: 4169/4400 Accuracy: 94.75% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4211/4444 Accuracy: 94.76% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 335 1 1 3 7 4 5 7 1 5 2] + [ 0 2 387 1 0 1 4 0 0 0 1 1] + [ 0 13 0 383 1 0 3 0 0 1 1 4] + [ 0 4 1 0 325 1 2 0 2 11 3 1] + [ 0 3 0 7 0 359 2 1 0 0 4 1] + [ 0 1 3 1 0 0 345 1 0 0 0 1] + [ 0 4 0 1 0 0 0 356 0 2 0 0] + [ 1 9 0 2 3 0 0 2 338 8 0 0] + [ 0 4 1 1 19 0 0 0 9 336 2 1] + [ 1 2 0 0 7 0 0 1 1 1 337 0] + [ 0 8 2 11 0 7 0 0 1 3 1 339]] +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 189/ 200 Accuracy: 94.50% +Pred/Tot: 283/ 300 Accuracy: 94.33% +Pred/Tot: 379/ 400 Accuracy: 94.75% +Pred/Tot: 470/ 500 Accuracy: 94.00% +Pred/Tot: 567/ 600 Accuracy: 94.50% +Pred/Tot: 663/ 700 Accuracy: 94.71% +Pred/Tot: 760/ 800 Accuracy: 95.00% +Pred/Tot: 856/ 900 Accuracy: 95.11% +Pred/Tot: 952/1000 Accuracy: 95.20% +Pred/Tot: 1044/1100 Accuracy: 94.91% +Pred/Tot: 1141/1200 Accuracy: 95.08% +Pred/Tot: 1233/1300 Accuracy: 94.85% +Pred/Tot: 1326/1400 Accuracy: 94.71% +Pred/Tot: 1419/1500 Accuracy: 94.60% +Pred/Tot: 1516/1600 Accuracy: 94.75% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1703/1800 Accuracy: 94.61% +Pred/Tot: 1794/1900 Accuracy: 94.42% +Pred/Tot: 1888/2000 Accuracy: 94.40% +Pred/Tot: 1981/2100 Accuracy: 94.33% +Pred/Tot: 2076/2200 Accuracy: 94.36% +Pred/Tot: 2172/2300 Accuracy: 94.43% +Pred/Tot: 2267/2400 Accuracy: 94.46% +Pred/Tot: 2364/2500 Accuracy: 94.56% +Pred/Tot: 2456/2600 Accuracy: 94.46% +Pred/Tot: 2551/2700 Accuracy: 94.48% +Pred/Tot: 2648/2800 Accuracy: 94.57% +Pred/Tot: 2743/2900 Accuracy: 94.59% +Pred/Tot: 2839/3000 Accuracy: 94.63% +Pred/Tot: 2934/3100 Accuracy: 94.65% +Pred/Tot: 3032/3200 Accuracy: 94.75% +Pred/Tot: 3130/3300 Accuracy: 94.85% +Pred/Tot: 3221/3400 Accuracy: 94.74% +Pred/Tot: 3315/3500 Accuracy: 94.71% +Pred/Tot: 3412/3600 Accuracy: 94.78% +Pred/Tot: 3507/3700 Accuracy: 94.78% +Pred/Tot: 3602/3800 Accuracy: 94.79% +Pred/Tot: 3697/3900 Accuracy: 94.79% +Pred/Tot: 3792/4000 Accuracy: 94.80% +Pred/Tot: 3886/4100 Accuracy: 94.78% +Pred/Tot: 3983/4200 Accuracy: 94.83% +Pred/Tot: 4080/4300 Accuracy: 94.88% +Pred/Tot: 4172/4400 Accuracy: 94.82% +Pred/Tot: 4262/4500 Accuracy: 94.71% +Pred/Tot: 4355/4600 Accuracy: 94.67% +Pred/Tot: 4451/4700 Accuracy: 94.70% +Pred/Tot: 4546/4800 Accuracy: 94.71% + +FINAL TESTING ACCURACY: +Pred/Tot: 4631/4889 Accuracy: 94.72% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 377 0 3 3 5 2 5 6 1 3 3] + [ 0 8 408 2 0 0 1 0 0 0 0 0] + [ 0 5 1 389 0 5 2 0 0 1 0 2] + [ 0 8 0 0 391 2 0 0 3 13 5 3] + [ 0 7 1 10 0 381 2 1 0 0 0 4] + [ 0 9 5 2 0 0 393 3 0 0 0 0] + [ 0 6 0 0 0 0 5 382 2 0 1 0] + [ 0 10 0 0 2 4 1 0 365 11 0 3] + [ 0 7 0 3 11 0 1 0 5 368 2 5] + [ 0 2 0 0 3 3 0 2 0 0 400 1] + [ 0 5 1 18 1 2 2 1 0 2 1 369]] diff --git a/accuracy_log/log_test_large_hp_power_v2.txt b/accuracy_log/log_test_large_hp_power_v2.txt new file mode 100644 index 0000000..0ca63fd --- /dev/null +++ b/accuracy_log/log_test_large_hp_power_v2.txt @@ -0,0 +1,2376 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_l_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_13 in: -33.88<(i8-0.00)*0.26467398<33.61 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_13) +forwards handler SOFTMAX_0_13 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -33.88<(i8-0.00)*0.26467398<33.61 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x276 B 1 +backwards FULLY_CONNECTED_0_12 in: -8.63<(i8-0.00)*0.06741016<8.56,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 2829000 + +==== Process Tiling For User Kernel: S4_Conv2d_276x1x10x4_Relu ======================= +S4_Conv2d_276x1x10x4_Relu Partition[0] Size = 312657 (Min: 200, Max: 440689), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: Scale, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: ScaleN, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 7 Parametric Space: [D1, M0=144] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 16 [ 52], Size: 320, Total: 320, Move: 1940 (Decl x 3.959184) L2 + Bias : Ratio: 0.000000, Size: 1152, Total: 1472, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 1748, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 2024, Move: 276 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 11520, Total: 13544, Move: 11040 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 11520, Total: 25064, Move: 69000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 23040, Total: 48104, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 48116, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_276x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 48116, L2Move: 83645, L3Move: 0, Tiling Overhead: 1.017641 +S4_Conv2d_276x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 11520 Bytes +S4_Conv2d_276x1x10x4_Relu Found Parametric value for space D1 (Initial: 276, Div: 8) = 144 [144*1 + 132] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 7 +Kernel: S4_Conv2d_276x1x10x4_Relu, Arg: In, Last Tile: 2+8, Pad: 5 => Requires Padding of tile N-1 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: In, Size: 160, Base1: 0, Base2: 160 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Bias, Size: 576, Base1: 320, Base2: 896 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Scale, Size: 276, Base1: 1472, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ScaleN, Size: 276, Base1: 1748, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Filter, Size: 5760, Base1: 2024, Base2: 7784 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Out, Size: 5760, Base1: 13544, Base2: 19304 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ConvOut, Size: 23040, Base1: 25064, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 48104, Base2: 0 +S4_Conv2d_276x1x10x4_Relu For Iter Space: 0 Iteration count: 7 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 48116, Reusable Memory: 620, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S7_Conv2d_276x1x3x3_Relu ======================= +S7_Conv2d_276x1x3x3_Relu Partition[0] Size = 102145 (Min: 60, Max: 265097), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 1104 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 2484 Bytes buffer +S7_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 69000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 25104, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 25380, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 25656, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 28140, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 34380, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 46860, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 46872, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 46872, L2Move: 91089, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 48 [48*5 + 36], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 25104, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 25380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 25656, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 28140, Base2: 31260 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 34380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 46860, Base2: 0 +S7_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 46872, Reusable Memory: 1864, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_276x276x1x1_Relu ======================= +S10_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S10_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S13_Conv2d_276x1x3x3_Relu ======================= +S13_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S13_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S13_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_276x276x1x1_Relu ======================= +S16_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S16_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S19_Conv2d_276x1x3x3_Relu ======================= +S19_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S19_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S19_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_276x276x1x1_Relu ======================= +S22_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S22_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S25_Conv2d_276x1x3x3_Relu ======================= +S25_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S25_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S25_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_276x276x1x1_Relu ======================= +S28_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S28_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S31_Conv2d_276x1x3x3_Relu ======================= +S31_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S31_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S31_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S31_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S31_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S34_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S34_Conv2d_276x276x1x1_Relu ======================= +S34_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S34_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 17940 + +==== Process Tiling For User Kernel: S35_AveragePool_13x5 ======================= +S35_AveragePool_13x5 Partition[0] Size = 36999 (Min: 130, Max: 36459), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S35_AveragePool_13x5, Total Raw Memory: 18228 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S35_AveragePool_13x5, Arg: In, Size: 17940, Base1: 0, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Out, Size: 276, Base1: 17940, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 18216, Base2: 0 +S35_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 18228, Reusable Memory: 30508, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S38_Linear_12x276x1x1, Linear: InDim: 276, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S38_Linear_12x276x1x1 ======================= +S38_Linear_12x276x1x1 Partition[0] Size = 7091 (Min: 0, Max: 7175), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S38_Linear_12x276x1x1, Total Raw Memory: 3684 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S38_Linear_12x276x1x1, Arg: In, Size: 276, Base1: 0, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Filter, Size: 3312, Base1: 276, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Bias, Size: 48, Base1: 3588, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Out, Size: 12, Base1: 3636, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Scale, Size: 12, Base1: 3648, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: ScaleN, Size: 12, Base1: 3660, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Infos, Size: 12, Base1: 3672, Base2: 0 +S38_Linear_12x276x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 3684, Reusable Memory: 45052, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S39_SoftMax ======================= + S39_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S39_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S39_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S39_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S39_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S39_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S38_Output[ In] Adding Edge From S38_Linear_12x276x1x1 To S39_SoftMax New + Symbol: S35_Output[ In] Adding Edge From S35_AveragePool_13x5 To S38_Linear_12x276x1x1 New + Symbol: S34_Output[ In] Adding Edge From S34_Conv2d_276x276x1x1_Relu To S35_AveragePool_13x5 New + Symbol: S31_Output[ In] Adding Edge From S31_Conv2d_276x1x3x3_Relu To S34_Conv2d_276x276x1x1_Relu New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_276x276x1x1_Relu To S31_Conv2d_276x1x3x3_Relu New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_276x1x3x3_Relu To S28_Conv2d_276x276x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_276x276x1x1_Relu To S25_Conv2d_276x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_276x1x3x3_Relu To S22_Conv2d_276x276x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_276x276x1x1_Relu To S19_Conv2d_276x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_276x1x3x3_Relu To S16_Conv2d_276x276x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_276x276x1x1_Relu To S13_Conv2d_276x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_276x1x3x3_Relu To S10_Conv2d_276x276x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_276x1x10x4_Relu To S7_Conv2d_276x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S39_SoftMax To __GraphExit__ New + Symbol: S39_Infos[ In] Adding Edge From __GraphEntry__ To S39_SoftMax New + Symbol: S38_Infos[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 New + Symbol: S38_Mul_shift[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S38_Mul_scale[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S35_Infos[ In] Adding Edge From __GraphEntry__ To S35_AveragePool_13x5 New + Symbol: S34_Infos[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu New + Symbol: S34_Mul_shift[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S34_Mul_scale[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S31_Infos[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu New + Symbol: S31_Mul_shift[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S31_Mul_scale[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 86940 => Alloc: OK + +After Const Allocation, TopL3: 304704, TopL2: 284517 => Alloc: OK + +[FULL] Remapping [163116 .. 284516] to [0 .. 121400] Align compensation: 3 +[PART] Remapping [0 .. 163115] to [121404 .. 284519] Align compensation: 0 +[PART] Remapping [284517 .. 349999] to [284520 .. 350002] Align compensation: 1 +[FULL] Remapping [0 .. 304703] to [0 .. 304703] Align compensation: 0 +[PART] Remapping [304704 .. 6388607] to [304704 .. 6388607] Align compensation: 0 +Symbol allocation for graph KWS_ds_cnn_l_quantCNN is sucessfull, L2: 284517 out of 350000, L3: 304704 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => Dscnnconv_ds_5dw_convweights_q + (null) => Dscnnconv_ds_5dw_convdepthwise + (null) => S31_Mul_scale + (null) => S31_Mul_shift + (null) => S31_Infos + (null) => Dscnnconv_ds_5pw_convweights_q + (null) => Dscnnconv_ds_5pw_convconv2d_fo + (null) => S34_Mul_scale + (null) => S34_Mul_shift + (null) => S34_Infos + (null) => S35_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S38_Mul_scale + (null) => S38_Mul_shift + (null) => S38_Infos + (null) => S39_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 13 14 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_276x1x10x4_Relu, Operations: 2829000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1940, TileOverhead: 3.959184, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 11040, L3_Move: 0, L2_Move: 11040, TileOverhead: 1.000000, L2Buff: 0, Addr: 2024 +CI Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 320 + O Out => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 13544 +CI Buff Scale => S4_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1748 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 48104 + Kernel Memory : L3: 0, L2: 82195 + Kernel Total Memory: 82195, L3 moves: 0, L2 moves: 83645, Move overhead: 1.017641 + Kernel Operations : 2829000 [KernelOper/GraphOper: 9.924761%], Move/Operation ratio: [L3: 0.000000, L2: 0.029567] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 25656 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28140 +CI Buff Scale => S7_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25104 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25380 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 46860 + Kernel Memory : L3: 0, L2: 91089 + Kernel Total Memory: 91089, L3 moves: 0, L2 moves: 91089, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.507742] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S10_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S13_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S16_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S19_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S22_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S25_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S28_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S31_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_5dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_5dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S31_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S31_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S31_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S31_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S34_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_5pw_convweights_q --L2-- Size: 76176, L3_Move: 0, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_5pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S34_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S34_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S34_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 0, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.000000, L2: 0.041083] + Successors: 12 + + Living Dynamic Symbols: [S31_Output] [S34_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S35_AveragePool_13x5, Operations: 17940 + I Buff In => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 17940 +CI Buff Infos => S35_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 18216 + Kernel Memory : L3: 0, L2: 18225 + Kernel Total Memory: 18225, L3 moves: 0, L2 moves: 18225, Move overhead: 1.000000 + Kernel Operations : 17940 [KernelOper/GraphOper: 0.062938%], Move/Operation ratio: [L3: 0.000000, L2: 1.015886] + Successors: 13 + + Living Dynamic Symbols: [S34_Output] [S35_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: UKer S38_Linear_12x276x1x1, Operations: 3312 + I Buff In => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 3312, L3_Move: 0, L2_Move: 3312, TileOverhead: 1.000000, L2Buff: 0, Addr: 276 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 3588 + O Buff Out => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3636 +CI Buff Scale => S38_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3648 +CI Buff ScaleN => S38_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3660 +CI Buff Infos => S38_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 3672 + Kernel Memory : L3: 0, L2: 3681 + Kernel Total Memory: 3681, L3 moves: 0, L2 moves: 3681, Move overhead: 1.000000 + Kernel Operations : 3312 [KernelOper/GraphOper: 0.011619%], Move/Operation ratio: [L3: 0.000000, L2: 1.111413] + Successors: 14 + + Living Dynamic Symbols: [S35_Output] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 14, Channel 0 0: UKer S39_SoftMax, Operations: 12 + I Buff In => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S39_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000042%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 15 + + Living Dynamic Symbols: [Output_1] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 15, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 113721 + Graph nodes min global memory: L3: 76176, L2: 113724 + Graph sum of kernel arguments size: 923956, L3 moves: 304704, L2 moves: 1373906, Move overhead: 1.486982 + Graph total operations: 28504464 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 923956, Total L3_Move: 304704, Total L2_Move: 1373906, Tiling Overhead Average: 1.486982 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic -228528 86940 + Const 304704 77844 + Total 76176 113724 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 15]@ 380880 LOAD: L2[ 0: 15]@ 76176 EXEC: L2[ 0: 15]@ 76176 , Size: 11040 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 15]@ 407652 LOAD: L2[ 0: 15]@ 102948 EXEC: L2[ 0: 15]@ 102948 , Size: 1104 + S4_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 419796 LOAD: L2[ 0: 15]@ 115092 EXEC: L2[ 0: 15]@ 115092 , Size: 276 + S4_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420072 LOAD: L2[ 0: 15]@ 115368 EXEC: L2[ 0: 15]@ 115368 , Size: 276 + S4_Infos INSTALL: HyperFlash[ 0: 15]@ 425916 LOAD: L2[ 0: 15]@ 121212 EXEC: L2[ 0: 15]@ 121212 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 395232 LOAD: L2[ 0: 15]@ 90528 EXEC: L2[ 0: 15]@ 90528 , Size: 2484 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 408756 LOAD: L2[ 0: 15]@ 104052 EXEC: L2[ 0: 15]@ 104052 , Size: 1104 + S7_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420348 LOAD: L2[ 0: 15]@ 115644 EXEC: L2[ 0: 15]@ 115644 , Size: 276 + S7_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420624 LOAD: L2[ 0: 15]@ 115920 EXEC: L2[ 0: 15]@ 115920 , Size: 276 + S7_Infos INSTALL: HyperFlash[ 0: 15]@ 425928 LOAD: L2[ 0: 15]@ 121224 EXEC: L2[ 0: 15]@ 121224 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 0 LOAD: HyperRam[ 0: 15]@ 0 EXEC: L2[ 1: 3]@ 208344 , Size: 76176 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 409860 LOAD: L2[ 0: 15]@ 105156 EXEC: L2[ 0: 15]@ 105156 , Size: 1104 + S10_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420900 LOAD: L2[ 0: 15]@ 116196 EXEC: L2[ 0: 15]@ 116196 , Size: 276 + S10_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421176 LOAD: L2[ 0: 15]@ 116472 EXEC: L2[ 0: 15]@ 116472 , Size: 276 + S10_Infos INSTALL: HyperFlash[ 0: 15]@ 425940 LOAD: L2[ 0: 15]@ 121236 EXEC: L2[ 0: 15]@ 121236 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 397716 LOAD: L2[ 0: 15]@ 93012 EXEC: L2[ 0: 15]@ 93012 , Size: 2484 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 410964 LOAD: L2[ 0: 15]@ 106260 EXEC: L2[ 0: 15]@ 106260 , Size: 1104 + S13_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 421452 LOAD: L2[ 0: 15]@ 116748 EXEC: L2[ 0: 15]@ 116748 , Size: 276 + S13_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421728 LOAD: L2[ 0: 15]@ 117024 EXEC: L2[ 0: 15]@ 117024 , Size: 276 + S13_Infos INSTALL: HyperFlash[ 0: 15]@ 425952 LOAD: L2[ 0: 15]@ 121248 EXEC: L2[ 0: 15]@ 121248 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 76176 LOAD: HyperRam[ 0: 15]@ 76176 EXEC: L2[ 4: 5]@ 157284 , Size: 76176 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 412068 LOAD: L2[ 0: 15]@ 107364 EXEC: L2[ 0: 15]@ 107364 , Size: 1104 + S16_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422004 LOAD: L2[ 0: 15]@ 117300 EXEC: L2[ 0: 15]@ 117300 , Size: 276 + S16_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422280 LOAD: L2[ 0: 15]@ 117576 EXEC: L2[ 0: 15]@ 117576 , Size: 276 + S16_Infos INSTALL: HyperFlash[ 0: 15]@ 425964 LOAD: L2[ 0: 15]@ 121260 EXEC: L2[ 0: 15]@ 121260 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 400200 LOAD: L2[ 0: 15]@ 95496 EXEC: L2[ 0: 15]@ 95496 , Size: 2484 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 413172 LOAD: L2[ 0: 15]@ 108468 EXEC: L2[ 0: 15]@ 108468 , Size: 1104 + S19_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422556 LOAD: L2[ 0: 15]@ 117852 EXEC: L2[ 0: 15]@ 117852 , Size: 276 + S19_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422832 LOAD: L2[ 0: 15]@ 118128 EXEC: L2[ 0: 15]@ 118128 , Size: 276 + S19_Infos INSTALL: HyperFlash[ 0: 15]@ 425976 LOAD: L2[ 0: 15]@ 121272 EXEC: L2[ 0: 15]@ 121272 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 152352 LOAD: HyperRam[ 0: 15]@ 152352 EXEC: L2[ 6: 7]@ 157284 , Size: 76176 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 414276 LOAD: L2[ 0: 15]@ 109572 EXEC: L2[ 0: 15]@ 109572 , Size: 1104 + S22_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423108 LOAD: L2[ 0: 15]@ 118404 EXEC: L2[ 0: 15]@ 118404 , Size: 276 + S22_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423384 LOAD: L2[ 0: 15]@ 118680 EXEC: L2[ 0: 15]@ 118680 , Size: 276 + S22_Infos INSTALL: HyperFlash[ 0: 15]@ 425988 LOAD: L2[ 0: 15]@ 121284 EXEC: L2[ 0: 15]@ 121284 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 402684 LOAD: L2[ 0: 15]@ 97980 EXEC: L2[ 0: 15]@ 97980 , Size: 2484 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 415380 LOAD: L2[ 0: 15]@ 110676 EXEC: L2[ 0: 15]@ 110676 , Size: 1104 + S25_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423660 LOAD: L2[ 0: 15]@ 118956 EXEC: L2[ 0: 15]@ 118956 , Size: 276 + S25_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423936 LOAD: L2[ 0: 15]@ 119232 EXEC: L2[ 0: 15]@ 119232 , Size: 276 + S25_Infos INSTALL: HyperFlash[ 0: 15]@ 426000 LOAD: L2[ 0: 15]@ 121296 EXEC: L2[ 0: 15]@ 121296 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 228528 LOAD: HyperRam[ 0: 15]@ 228528 EXEC: L2[ 8: 9]@ 175224 , Size: 76176 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 416484 LOAD: L2[ 0: 15]@ 111780 EXEC: L2[ 0: 15]@ 111780 , Size: 1104 + S28_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424212 LOAD: L2[ 0: 15]@ 119508 EXEC: L2[ 0: 15]@ 119508 , Size: 276 + S28_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 424488 LOAD: L2[ 0: 15]@ 119784 EXEC: L2[ 0: 15]@ 119784 , Size: 276 + S28_Infos INSTALL: HyperFlash[ 0: 15]@ 426012 LOAD: L2[ 0: 15]@ 121308 EXEC: L2[ 0: 15]@ 121308 , Size: 9 + Dscnnconv_ds_5dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 405168 LOAD: L2[ 0: 15]@ 100464 EXEC: L2[ 0: 15]@ 100464 , Size: 2484 + Dscnnconv_ds_5dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 417588 LOAD: L2[ 0: 15]@ 112884 EXEC: L2[ 0: 15]@ 112884 , Size: 1104 + S31_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424764 LOAD: L2[ 0: 15]@ 120060 EXEC: L2[ 0: 15]@ 120060 , Size: 276 + S31_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425040 LOAD: L2[ 0: 15]@ 120336 EXEC: L2[ 0: 15]@ 120336 , Size: 276 + S31_Infos INSTALL: HyperFlash[ 0: 15]@ 426024 LOAD: L2[ 0: 15]@ 121320 EXEC: L2[ 0: 15]@ 121320 , Size: 9 + Dscnnconv_ds_5pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 304704 LOAD: L2[ 0: 15]@ 0 EXEC: L2[ 0: 15]@ 0 , Size: 76176 + Dscnnconv_ds_5pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 418692 LOAD: L2[ 0: 15]@ 113988 EXEC: L2[ 0: 15]@ 113988 , Size: 1104 + S34_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 425316 LOAD: L2[ 0: 15]@ 120612 EXEC: L2[ 0: 15]@ 120612 , Size: 276 + S34_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425592 LOAD: L2[ 0: 15]@ 120888 EXEC: L2[ 0: 15]@ 120888 , Size: 276 + S34_Infos INSTALL: HyperFlash[ 0: 15]@ 426036 LOAD: L2[ 0: 15]@ 121332 EXEC: L2[ 0: 15]@ 121332 , Size: 9 + S35_Infos INSTALL: HyperFlash[ 0: 15]@ 426048 LOAD: L2[ 0: 15]@ 121344 EXEC: L2[ 0: 15]@ 121344 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 15]@ 391920 LOAD: L2[ 0: 15]@ 87216 EXEC: L2[ 0: 15]@ 87216 , Size: 3312 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 15]@ 425868 LOAD: L2[ 0: 15]@ 121164 EXEC: L2[ 0: 15]@ 121164 , Size: 48 + S38_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 426060 LOAD: L2[ 0: 15]@ 121356 EXEC: L2[ 0: 15]@ 121356 , Size: 12 + S38_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 426072 LOAD: L2[ 0: 15]@ 121368 EXEC: L2[ 0: 15]@ 121368 , Size: 12 + S38_Infos INSTALL: HyperFlash[ 0: 15]@ 426084 LOAD: L2[ 0: 15]@ 121380 EXEC: L2[ 0: 15]@ 121380 , Size: 9 + S39_Infos INSTALL: HyperFlash[ 0: 15]@ 426096 LOAD: L2[ 0: 15]@ 121392 EXEC: L2[ 0: 15]@ 121392 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 139344 , Size: 69000 + S7_Output EXEC: L2[ 2: 3]@ 121404 , Size: 17940 + S10_Output EXEC: L2[ 3: 4]@ 139344 , Size: 17940 + S13_Output EXEC: L2[ 4: 5]@ 121404 , Size: 17940 + S16_Output EXEC: L2[ 5: 6]@ 139344 , Size: 17940 + S19_Output EXEC: L2[ 6: 7]@ 121404 , Size: 17940 + S22_Output EXEC: L2[ 7: 8]@ 139344 , Size: 17940 + S25_Output EXEC: L2[ 8: 9]@ 157284 , Size: 17940 + S28_Output EXEC: L2[ 9: 10]@ 121404 , Size: 17940 + S31_Output EXEC: L2[ 10: 11]@ 139344 , Size: 17940 + S34_Output EXEC: L2[ 11: 12]@ 121404 , Size: 17940 + S35_Output EXEC: L2[ 12: 13]@ 139344 , Size: 276 + S38_Output EXEC: L2[ 13: 14]@ 121404 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_276x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S31_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S34_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S35_AveragePool_13x5 +Generating Code For User Kernel: S38_Linear_12x276x1x1 +Generating Code For User Kernel: S39_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 11040 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S35_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 3312 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S39_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_l_quant_L3_Flash_Const.dat (size 426108) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 48116 +L2 Memory size (Bytes) : Given: 350000, Used: 284517 +L3 Memory size (Bytes) : Given: 6388608, Used: 304704 + +L3 Memory bandwidth for 1 graph run : 304704 Bytes +L2 Memory bandwidth for 1 graph run : 1373906 Bytes +Sum of all Kernels arguments size : 923956 Bytes +Tiling Bandwith overhead : 1.486982 Move/KerArgSize +Sum of baseline bandwidth : 50099340 Bytes +Percentage of baseline BW for L2 : 2.74236 % +Percentage of baseline BW for L3 : 0.6082 % +Sum of all Kernels operations : 28504464 Operations +Total amount of flash coefficients : 426108 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_l_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_l_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_l_quantKernels.h Header file for the generated C code + KWS_ds_cnn_l_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 14:20:28.837193 139654693844800 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 14:20:28.837715 139654693844800 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 14:20:28.837951 139654693844800 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 14:20:28.839356: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 14:20:28.849666: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 14:20:28.849973: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5591551093d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 14:20:28.850012: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 14:20:28.852359: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 14:20:28.852406: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 14:20:28.852433: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 14:22:24.616994 139654693844800 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 14:22:24.619407 139654693844800 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 14:22:25.017635 139654693844800 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 14:22:25.017889 139654693844800 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 14:25:35.202639 139654693844800 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 194/ 200 Accuracy: 97.00% +Pred/Tot: 286/ 300 Accuracy: 95.33% +Pred/Tot: 380/ 400 Accuracy: 95.00% +Pred/Tot: 477/ 500 Accuracy: 95.40% +Pred/Tot: 571/ 600 Accuracy: 95.17% +Pred/Tot: 669/ 700 Accuracy: 95.57% +Pred/Tot: 760/ 800 Accuracy: 95.00% +Pred/Tot: 853/ 900 Accuracy: 94.78% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1039/1100 Accuracy: 94.45% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1228/1300 Accuracy: 94.46% +Pred/Tot: 1322/1400 Accuracy: 94.43% +Pred/Tot: 1416/1500 Accuracy: 94.40% +Pred/Tot: 1513/1600 Accuracy: 94.56% +Pred/Tot: 1609/1700 Accuracy: 94.65% +Pred/Tot: 1706/1800 Accuracy: 94.78% +Pred/Tot: 1801/1900 Accuracy: 94.79% +Pred/Tot: 1898/2000 Accuracy: 94.90% +Pred/Tot: 1995/2100 Accuracy: 95.00% +Pred/Tot: 2091/2200 Accuracy: 95.05% +Pred/Tot: 2186/2300 Accuracy: 95.04% +Pred/Tot: 2278/2400 Accuracy: 94.92% +Pred/Tot: 2368/2500 Accuracy: 94.72% +Pred/Tot: 2460/2600 Accuracy: 94.62% +Pred/Tot: 2557/2700 Accuracy: 94.70% +Pred/Tot: 2653/2800 Accuracy: 94.75% +Pred/Tot: 2748/2900 Accuracy: 94.76% +Pred/Tot: 2843/3000 Accuracy: 94.77% +Pred/Tot: 2940/3100 Accuracy: 94.84% +Pred/Tot: 3035/3200 Accuracy: 94.84% +Pred/Tot: 3131/3300 Accuracy: 94.88% +Pred/Tot: 3222/3400 Accuracy: 94.76% +Pred/Tot: 3318/3500 Accuracy: 94.80% +Pred/Tot: 3414/3600 Accuracy: 94.83% +Pred/Tot: 3507/3700 Accuracy: 94.78% +Pred/Tot: 3604/3800 Accuracy: 94.84% +Pred/Tot: 3694/3900 Accuracy: 94.72% +Pred/Tot: 3788/4000 Accuracy: 94.70% +Pred/Tot: 3884/4100 Accuracy: 94.73% +Pred/Tot: 3979/4200 Accuracy: 94.74% +Pred/Tot: 4072/4300 Accuracy: 94.70% +Pred/Tot: 4169/4400 Accuracy: 94.75% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4211/4444 Accuracy: 94.76% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 337 0 1 4 7 3 3 7 2 3 4] + [ 0 2 384 3 0 1 3 2 0 0 0 2] + [ 1 11 0 381 0 1 3 0 0 1 1 7] + [ 0 2 1 0 327 0 2 0 2 10 5 1] + [ 0 0 0 9 0 360 1 0 0 1 3 3] + [ 0 3 5 0 0 0 341 2 0 0 0 1] + [ 0 6 0 1 0 0 2 352 0 1 1 0] + [ 0 8 0 2 3 0 0 1 341 8 0 0] + [ 0 2 1 1 17 0 1 1 6 340 3 1] + [ 1 3 0 0 7 1 1 0 0 3 334 0] + [ 0 11 0 9 0 4 0 0 1 3 1 343]] +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 187/ 200 Accuracy: 93.50% +Pred/Tot: 282/ 300 Accuracy: 94.00% +Pred/Tot: 377/ 400 Accuracy: 94.25% +Pred/Tot: 469/ 500 Accuracy: 93.80% +Pred/Tot: 563/ 600 Accuracy: 93.83% +Pred/Tot: 659/ 700 Accuracy: 94.14% +Pred/Tot: 755/ 800 Accuracy: 94.38% +Pred/Tot: 852/ 900 Accuracy: 94.67% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1229/1300 Accuracy: 94.54% +Pred/Tot: 1324/1400 Accuracy: 94.57% +Pred/Tot: 1417/1500 Accuracy: 94.47% +Pred/Tot: 1514/1600 Accuracy: 94.62% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1705/1800 Accuracy: 94.72% +Pred/Tot: 1795/1900 Accuracy: 94.47% +Pred/Tot: 1890/2000 Accuracy: 94.50% +Pred/Tot: 1985/2100 Accuracy: 94.52% +Pred/Tot: 2081/2200 Accuracy: 94.59% +Pred/Tot: 2175/2300 Accuracy: 94.57% +Pred/Tot: 2272/2400 Accuracy: 94.67% +Pred/Tot: 2367/2500 Accuracy: 94.68% +Pred/Tot: 2462/2600 Accuracy: 94.69% +Pred/Tot: 2557/2700 Accuracy: 94.70% +Pred/Tot: 2653/2800 Accuracy: 94.75% +Pred/Tot: 2749/2900 Accuracy: 94.79% +Pred/Tot: 2843/3000 Accuracy: 94.77% +Pred/Tot: 2937/3100 Accuracy: 94.74% +Pred/Tot: 3034/3200 Accuracy: 94.81% +Pred/Tot: 3133/3300 Accuracy: 94.94% +Pred/Tot: 3225/3400 Accuracy: 94.85% +Pred/Tot: 3319/3500 Accuracy: 94.83% +Pred/Tot: 3416/3600 Accuracy: 94.89% +Pred/Tot: 3508/3700 Accuracy: 94.81% +Pred/Tot: 3604/3800 Accuracy: 94.84% +Pred/Tot: 3701/3900 Accuracy: 94.90% +Pred/Tot: 3796/4000 Accuracy: 94.90% +Pred/Tot: 3888/4100 Accuracy: 94.83% +Pred/Tot: 3985/4200 Accuracy: 94.88% +Pred/Tot: 4080/4300 Accuracy: 94.88% +Pred/Tot: 4174/4400 Accuracy: 94.86% +Pred/Tot: 4264/4500 Accuracy: 94.76% +Pred/Tot: 4357/4600 Accuracy: 94.72% +Pred/Tot: 4453/4700 Accuracy: 94.74% +Pred/Tot: 4547/4800 Accuracy: 94.73% + +FINAL TESTING ACCURACY: +Pred/Tot: 4633/4889 Accuracy: 94.76% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 374 0 3 1 9 2 4 7 1 4 3] + [ 0 5 409 4 0 0 1 0 0 0 0 0] + [ 0 3 1 393 0 3 1 0 0 1 0 3] + [ 0 5 0 0 398 3 0 0 3 9 5 2] + [ 0 7 1 10 0 380 2 1 0 0 2 3] + [ 0 8 3 1 2 0 396 2 0 0 0 0] + [ 0 9 0 0 0 0 4 380 1 0 2 0] + [ 0 8 0 0 1 3 1 0 368 11 1 3] + [ 0 4 0 1 20 0 1 0 4 367 1 4] + [ 0 2 0 0 2 3 0 2 0 1 396 5] + [ 0 10 1 21 0 3 0 1 0 1 1 364]] diff --git a/accuracy_log/log_test_large_hp_spectr.txt b/accuracy_log/log_test_large_hp_spectr.txt new file mode 100644 index 0000000..fd1ff1f --- /dev/null +++ b/accuracy_log/log_test_large_hp_spectr.txt @@ -0,0 +1,3 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 1 diff --git a/accuracy_log/log_test_large_hp_spectr_v2.txt b/accuracy_log/log_test_large_hp_spectr_v2.txt new file mode 100644 index 0000000..3ff79c5 --- /dev/null +++ b/accuracy_log/log_test_large_hp_spectr_v2.txt @@ -0,0 +1,27 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +make[1]: *** No rule to make target 'model/KWS_ds_cnn_l_quant.tflite', needed by 'BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite'. Stop. +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +Traceback (most recent call last): + File "utils/test_accuracy_emul.py", line 311, in + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run + _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 300, in run + _run_main(main, args) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main + sys.exit(main(argv)) + File "utils/test_accuracy_emul.py", line 90, in main + raise Exception("Executable not generated correctly") +Exception: Executable not generated correctly +make: *** [Makefile:125: test_accuracy] Error 1 diff --git a/accuracy_log/log_test_large_power.txt b/accuracy_log/log_test_large_power.txt new file mode 100644 index 0000000..d7a15fd --- /dev/null +++ b/accuracy_log/log_test_large_power.txt @@ -0,0 +1,1721 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_l_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_l_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT|nodeoption 1 PARALLELFEATURES 0|g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_9[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_9_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_10 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_9[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_10[0] +eliminate_transposes - looking down at CONV_2D_0_10_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_11[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_11 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_10[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_11[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_12[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_12 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_11[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_9 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_9 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_10 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_10 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_10 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_10 +eliminate_transposes_actions - CONV_2D_0_10 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_10 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_11 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_11 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_11 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_12 in with shape 1x1x276 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_12 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_9,DEPTHWISE_CONV_2D_0_9_activation +match_gap_conv - fusing nodes CONV_2D_0_10,CONV_2D_0_10_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_13 to node FULLY_CONNECTED_0_12 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 276x25x10 | 0 | 69490 | 11316 | 2.76M | F 276x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 276x25x10 | 276x13x5 | 1 | 86940 | 285 | 161.46K | F 276x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 2 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 3 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 4 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 5 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 6 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 7 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 8 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | DEPTHWISE_CONV_2D_0_9_fusio | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 9 | 35880 | 285 | 161.46K | F 276x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 276 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | CONV_2D_0_10_fusion | conv_fusion_conv_active | 276x13x5 | 276x13x5 | 10 | 35880 | 76452 | 4.95M | F 276x276x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | AVERAGE_POOL_2D_0_11 | average_pool | 276x13x5 | 276x1x1 | 11 | 18216 | 0 | 18.22K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | FULLY_CONNECTED_0_12 | linear | 276x1x1 | 12 | 12 | 288 | 3324 | 3.31K | F 12x276x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 14 | SOFTMAX_0_13 | softmax | 12 | 12 | 13 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 15 | output_1 | output | 12 | 12 | 14 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 86940 | 398325 | 28.35M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 485265 | 28.35M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 62586<16.72 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -16.85>chan | Q32.0 | Q32.0 | +| | D_0_1 | 2586<16.72 | 24377<19.84 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -20.00>chan | Q32.0 | Q32.0 | +| | | 4377<19.84 | 00155<17.78 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -17.92>chan | Q32.0 | Q32.0 | +| | D_0_3 | 0155<17.78 | 64949<17.10 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -17.24>chan | Q32.0 | Q32.0 | +| | | 4949<17.10 | 43959<10.60 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.68>chan | Q32.0 | Q32.0 | +| | D_0_5 | 3959<10.60 | 66984<12.53 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -12.63>chan | Q32.0 | Q32.0 | +| | | 6984<12.53 | 0435<8.73 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -8.79>chan | Q32.0 | Q32.0 | +| | D_0_7 | 435<8.73 | 59043<11.76 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -11.85>chan | Q32.0 | Q32.0 | +| | | 9043<11.76 | 4307<8.12 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -8.18>chan | Q32.0 | Q32.0 | +| | D_0_9 | 307<8.12 | 84755<12.17 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 10 | DEPTHWISE_CONV_2 | -12.27>chan | Q32.0 | Q32.0 | +| | | 4755<12.17 | 1018<8.56 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 11 | CONV_2D_0_10_act | -8.63>chan | Q32.0 | Q32.0 | +| | 0_12 | 018<8.56 | 00000<33.61 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 14 | SOFTMAX_0_13 | -33.88 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerSetBiasB32_SQ8 + ConvKerName: KerConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerReduct_CC_ReLU_SQ8 +Nb Oper : 2829000 + +==== Process Tiling For User Kernel: S1_Conv2d_276x1x10x4_Relu ======================= +S1_Conv2d_276x1x10x4_Relu Partition[0] Size = 1373 (Min: 200, Max: 3017), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_276x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 2.000000, FixDim: 10, VarDim: 49 [ 49], Size: 984, Total: 984, Move: 135240 (Decl x 276.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 2088, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 2364, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 2640, Move: 276 (Decl x 1.000000) L2 + Filter : Ratio: 0.000000, Size: 80, Total: 2720, Move: 11040 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 25 [ 25], Size: 504, Total: 3224, Move: 69000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 25 [ 25], Size: 1000, Total: 4224, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 4236, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_276x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 4236, L2Move: 216945, L3Move: 0, Tiling Overhead: 2.639394 +S1_Conv2d_276x1x10x4_Relu Iteration for Tiled Space: 1 +S1_Conv2d_276x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 4236, Reusable Memory: 44500, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S2_Conv2d_276x1x3x3_Relu ======================= +S2_Conv2d_276x1x3x3_Relu Partition[0] Size = 102161 (Min: 60, Max: 265225), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 1104 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 276 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 276 bytes buffer +S2_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 2484 bytes buffer +S2_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 69000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 25104, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 25380, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 25656, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 28140, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 34380, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 46860, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 46872, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 46872, L2Move: 91089, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 48 [48*5 + 36], Iteration for Tiled Space: 1 +S2_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 46872, Reusable Memory: 1864, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_276x276x1x1_Relu ======================= +S3_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S3_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S3_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S3_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S3_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S3_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S3_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S3_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S4_Conv2d_276x1x3x3_Relu ======================= +S4_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S4_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S4_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S4_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_276x276x1x1_Relu ======================= +S5_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S5_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S5_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S5_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S5_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S5_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S5_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S5_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S6_Conv2d_276x1x3x3_Relu ======================= +S6_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S6_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S6_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S6_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_276x276x1x1_Relu ======================= +S7_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S7_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S7_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S7_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S7_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S7_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S7_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S8_Conv2d_276x1x3x3_Relu ======================= +S8_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S8_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S8_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S8_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_276x276x1x1_Relu ======================= +S9_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S9_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S9_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S9_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S9_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S9_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S9_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S9_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S10_Conv2d_276x1x3x3_Relu ======================= +S10_Conv2d_276x1x3x3_Relu Partition[0] Size = 91121 (Min: 30, Max: 157585), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 1104 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 276 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 276 bytes buffer +S10_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 2484 bytes buffer +S10_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +S10_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S11_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S11_Conv2d_276x276x1x1_Relu ======================= +S11_Conv2d_276x276x1x1_Relu Partition[0] Size = 3353 (Min: 2208, Max: 37089), Fraction: 0.32, Giving: 15567 bytes out of 48736 bytes +S11_Conv2d_276x276x1x1_Relu Partition[1] Size = 7144 (Min: 4416, Max: 190048), Fraction: 0.68, Giving: 33168 bytes out of 48736 bytes +S11_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 7 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 40 [ 276], Size: 22080, Total: 22080, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 23184, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 40 [ 276], Size: 5200, Total: 28384, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 28660, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 28936, Move: 276 (Decl x 1.000000) L2 +S11_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 28936, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S11_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 7 +S11_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 7 (Last one is truncated), Given L1 Memory: 33168, Used L1 Memory: 28936, Reusable Memory: 4232, Used L2 Memory: 0 +S11_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 24 [ 65], Size: 13248, Total: 14352, Move: 125580 (Decl x 7.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 14364, Move: 9 (Decl x 1.000000) L2 +S11_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 14364, L2Move: 125589, L3Move: 0, Tiling Overhead: 6.996992 +S11_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 3 +S11_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 15567, Used L1 Memory: 14364, Reusable Memory: 1200, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 17940 + +==== Process Tiling For User Kernel: S12_AveragePool_13x5 ======================= +S12_AveragePool_13x5 Partition[0] Size = 37005 (Min: 130, Max: 36477), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_AveragePool_13x5, Total Raw Memory: 18228 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S12_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 18228, Reusable Memory: 30508, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S13_Linear_12x276x1x1, Linear: InDim: 276, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S13_Linear_12x276x1x1 ======================= +S13_Linear_12x276x1x1 Partition[0] Size = 7105 (Min: 0, Max: 7273), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S13_Linear_12x276x1x1, Total Raw Memory: 3684 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S13_Linear_12x276x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 3684, Reusable Memory: 45052, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S14_SoftMax ======================= + S14_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S14_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S14_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S13_Output[ In] Adding Edge From S13_Linear_12x276x1x1 To S14_SoftMax New + Symbol: S12_Output[ In] Adding Edge From S12_AveragePool_13x5 To S13_Linear_12x276x1x1 New + Symbol: S11_Output[ In] Adding Edge From S11_Conv2d_276x276x1x1_Relu To S12_AveragePool_13x5 New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_276x1x3x3_Relu To S11_Conv2d_276x276x1x1_Relu New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_276x276x1x1_Relu To S10_Conv2d_276x1x3x3_Relu New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_276x1x3x3_Relu To S9_Conv2d_276x276x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_276x276x1x1_Relu To S8_Conv2d_276x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_276x1x3x3_Relu To S7_Conv2d_276x276x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_276x276x1x1_Relu To S6_Conv2d_276x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_276x1x3x3_Relu To S5_Conv2d_276x276x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_276x276x1x1_Relu To S4_Conv2d_276x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_276x1x3x3_Relu To S3_Conv2d_276x276x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_276x1x10x4_Relu To S2_Conv2d_276x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S14_SoftMax To __GraphExit__ New + Symbol: S14_Infos[ In] Adding Edge From __GraphEntry__ To S14_SoftMax New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 New + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Biases[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Weights[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Linear_12x276x1x1 Exists + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_AveragePool_13x5 New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Conv2d_276x276x1x1_Relu Exists + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu New + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Biases[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Weights[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x1x3x3_Relu Exists + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_276x276x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_276x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x276x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_276x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_276x276x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_276x276x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_276x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_276x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 86940 => Alloc: OK + +After Const Allocation, TopL3: 304704, TopL2: 284517 => Alloc: OK + +[FULL] Remapping [163116 .. 284516] to [0 .. 121400] Align compensation: 3 +[PART] Remapping [0 .. 163115] to [121404 .. 284519] Align compensation: 0 +[PART] Remapping [284517 .. 349999] to [284520 .. 350002] Align compensation: 1 +[FULL] Remapping [0 .. 304703] to [0 .. 304703] Align compensation: 0 +[PART] Remapping [304704 .. 6388607] to [304704 .. 6388607] Align compensation: 0 +Symbol allocation for graph KWS_ds_cnn_l_quantCNN is sucessfull, L2: 284517 out of 350000, L3: 304704 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S10_Weights + (null) => S10_Biases + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + (null) => S13_Infos + (null) => S13_Weights + (null) => S13_Biases + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S14_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 13 14 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_276x1x10x4_Relu, Operations: 2829000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 135240, TileOverhead: 276.000000, L2Buff: 0, Addr: 0 +CI Filter => S1_Weights --L2-- Size: 11040, L3_Move: 0, L2_Move: 11040, TileOverhead: 1.000000, L2Buff: 0, Addr: 2640 +CI Buff Bias => S1_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 984 + O Out => S1_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 2720 +CI Buff Scale => S1_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 2088 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 2364 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 4224 + Kernel Memory : L3: 0, L2: 82195 + Kernel Total Memory: 82195, L3 moves: 0, L2 moves: 216945, Move overhead: 2.639394 + Kernel Operations : 2829000 [KernelOper/GraphOper: 9.924761%], Move/Operation ratio: [L3: 0.000000, L2: 0.076686] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S1_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 25656 +CI Buff Bias => S2_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28140 +CI Buff Scale => S2_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25104 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25380 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 46860 + Kernel Memory : L3: 0, L2: 91089 + Kernel Total Memory: 91089, L3 moves: 0, L2 moves: 91089, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.507742] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S2_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S3_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S3_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S3_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S3_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S4_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S4_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S4_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S5_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S5_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S5_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S5_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S6_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S6_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S6_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S7_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S7_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S8_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S8_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S8_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S9_Weights --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S9_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S9_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.044706] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S9_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S10_Weights --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => S10_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S10_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 125580, TileOverhead: 7.000000, L2Buff: 0, Addr: 30040 +CI In1 => S11_Weights --L2-- Size: 76176, L3_Move: 0, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S11_Biases --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 22080 + O Out => S11_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 23184 +CI Buff Scale => S11_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28384 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 28660 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43288 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 0, L2 moves: 221361, Move overhead: 1.946527 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.000000, L2: 0.044706] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_AveragePool_13x5, Operations: 17940 + I Buff In => S11_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S12_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 17940 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 18216 + Kernel Memory : L3: 0, L2: 18225 + Kernel Total Memory: 18225, L3 moves: 0, L2 moves: 18225, Move overhead: 1.000000 + Kernel Operations : 17940 [KernelOper/GraphOper: 0.062938%], Move/Operation ratio: [L3: 0.000000, L2: 1.015886] + Successors: 13 + + Living Dynamic Symbols: [S11_Output] [S12_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: UKer S13_Linear_12x276x1x1, Operations: 3312 + I Buff In => S12_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S13_Weights --L2-- Size: 3312, L3_Move: 0, L2_Move: 3312, TileOverhead: 1.000000, L2Buff: 0, Addr: 276 +CI Buff Bias => S13_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 3588 + O Buff Out => S13_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3636 +CI Buff Scale => S13_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3648 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3660 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 3672 + Kernel Memory : L3: 0, L2: 3681 + Kernel Total Memory: 3681, L3 moves: 0, L2 moves: 3681, Move overhead: 1.000000 + Kernel Operations : 3312 [KernelOper/GraphOper: 0.011619%], Move/Operation ratio: [L3: 0.000000, L2: 1.111413] + Successors: 14 + + Living Dynamic Symbols: [S12_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 14, Channel 0 0: UKer S14_SoftMax, Operations: 12 + I Buff In => S13_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S14_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000042%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 15 + + Living Dynamic Symbols: [Output_1] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 15, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 113721 + Graph nodes min global memory: L3: 76176, L2: 113724 + Graph sum of kernel arguments size: 923956, L3 moves: 304704, L2 moves: 1596906, Move overhead: 1.728335 + Graph total operations: 28504464 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 923956, Total L3_Move: 304704, Total L2_Move: 1596906, Tiling Overhead Average: 1.728335 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic -228528 86940 + Const 304704 77844 + Total 76176 113724 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 15]@ 425916 LOAD: L2[ 0: 15]@ 121212 EXEC: L2[ 0: 15]@ 121212 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 15]@ 380880 LOAD: L2[ 0: 15]@ 76176 EXEC: L2[ 0: 15]@ 76176 , Size: 11040 + S1_Biases INSTALL: HyperFlash[ 0: 15]@ 407652 LOAD: L2[ 0: 15]@ 102948 EXEC: L2[ 0: 15]@ 102948 , Size: 1104 + S1_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 419796 LOAD: L2[ 0: 15]@ 115092 EXEC: L2[ 0: 15]@ 115092 , Size: 276 + S1_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420072 LOAD: L2[ 0: 15]@ 115368 EXEC: L2[ 0: 15]@ 115368 , Size: 276 + S2_Infos INSTALL: HyperFlash[ 0: 15]@ 425928 LOAD: L2[ 0: 15]@ 121224 EXEC: L2[ 0: 15]@ 121224 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 15]@ 395232 LOAD: L2[ 0: 15]@ 90528 EXEC: L2[ 0: 15]@ 90528 , Size: 2484 + S2_Biases INSTALL: HyperFlash[ 0: 15]@ 408756 LOAD: L2[ 0: 15]@ 104052 EXEC: L2[ 0: 15]@ 104052 , Size: 1104 + S2_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420348 LOAD: L2[ 0: 15]@ 115644 EXEC: L2[ 0: 15]@ 115644 , Size: 276 + S2_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420624 LOAD: L2[ 0: 15]@ 115920 EXEC: L2[ 0: 15]@ 115920 , Size: 276 + S3_Infos INSTALL: HyperFlash[ 0: 15]@ 425940 LOAD: L2[ 0: 15]@ 121236 EXEC: L2[ 0: 15]@ 121236 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 15]@ 0 LOAD: HyperRam[ 0: 15]@ 0 EXEC: L2[ 1: 3]@ 208344 , Size: 76176 + S3_Biases INSTALL: HyperFlash[ 0: 15]@ 409860 LOAD: L2[ 0: 15]@ 105156 EXEC: L2[ 0: 15]@ 105156 , Size: 1104 + S3_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420900 LOAD: L2[ 0: 15]@ 116196 EXEC: L2[ 0: 15]@ 116196 , Size: 276 + S3_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421176 LOAD: L2[ 0: 15]@ 116472 EXEC: L2[ 0: 15]@ 116472 , Size: 276 + S4_Infos INSTALL: HyperFlash[ 0: 15]@ 425952 LOAD: L2[ 0: 15]@ 121248 EXEC: L2[ 0: 15]@ 121248 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 15]@ 397716 LOAD: L2[ 0: 15]@ 93012 EXEC: L2[ 0: 15]@ 93012 , Size: 2484 + S4_Biases INSTALL: HyperFlash[ 0: 15]@ 410964 LOAD: L2[ 0: 15]@ 106260 EXEC: L2[ 0: 15]@ 106260 , Size: 1104 + S4_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 421452 LOAD: L2[ 0: 15]@ 116748 EXEC: L2[ 0: 15]@ 116748 , Size: 276 + S4_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421728 LOAD: L2[ 0: 15]@ 117024 EXEC: L2[ 0: 15]@ 117024 , Size: 276 + S5_Infos INSTALL: HyperFlash[ 0: 15]@ 425964 LOAD: L2[ 0: 15]@ 121260 EXEC: L2[ 0: 15]@ 121260 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 15]@ 76176 LOAD: HyperRam[ 0: 15]@ 76176 EXEC: L2[ 4: 5]@ 157284 , Size: 76176 + S5_Biases INSTALL: HyperFlash[ 0: 15]@ 412068 LOAD: L2[ 0: 15]@ 107364 EXEC: L2[ 0: 15]@ 107364 , Size: 1104 + S5_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422004 LOAD: L2[ 0: 15]@ 117300 EXEC: L2[ 0: 15]@ 117300 , Size: 276 + S5_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422280 LOAD: L2[ 0: 15]@ 117576 EXEC: L2[ 0: 15]@ 117576 , Size: 276 + S6_Infos INSTALL: HyperFlash[ 0: 15]@ 425976 LOAD: L2[ 0: 15]@ 121272 EXEC: L2[ 0: 15]@ 121272 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 15]@ 400200 LOAD: L2[ 0: 15]@ 95496 EXEC: L2[ 0: 15]@ 95496 , Size: 2484 + S6_Biases INSTALL: HyperFlash[ 0: 15]@ 413172 LOAD: L2[ 0: 15]@ 108468 EXEC: L2[ 0: 15]@ 108468 , Size: 1104 + S6_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422556 LOAD: L2[ 0: 15]@ 117852 EXEC: L2[ 0: 15]@ 117852 , Size: 276 + S6_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422832 LOAD: L2[ 0: 15]@ 118128 EXEC: L2[ 0: 15]@ 118128 , Size: 276 + S7_Infos INSTALL: HyperFlash[ 0: 15]@ 425988 LOAD: L2[ 0: 15]@ 121284 EXEC: L2[ 0: 15]@ 121284 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 15]@ 152352 LOAD: HyperRam[ 0: 15]@ 152352 EXEC: L2[ 6: 7]@ 157284 , Size: 76176 + S7_Biases INSTALL: HyperFlash[ 0: 15]@ 414276 LOAD: L2[ 0: 15]@ 109572 EXEC: L2[ 0: 15]@ 109572 , Size: 1104 + S7_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423108 LOAD: L2[ 0: 15]@ 118404 EXEC: L2[ 0: 15]@ 118404 , Size: 276 + S7_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423384 LOAD: L2[ 0: 15]@ 118680 EXEC: L2[ 0: 15]@ 118680 , Size: 276 + S8_Infos INSTALL: HyperFlash[ 0: 15]@ 426000 LOAD: L2[ 0: 15]@ 121296 EXEC: L2[ 0: 15]@ 121296 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 15]@ 402684 LOAD: L2[ 0: 15]@ 97980 EXEC: L2[ 0: 15]@ 97980 , Size: 2484 + S8_Biases INSTALL: HyperFlash[ 0: 15]@ 415380 LOAD: L2[ 0: 15]@ 110676 EXEC: L2[ 0: 15]@ 110676 , Size: 1104 + S8_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423660 LOAD: L2[ 0: 15]@ 118956 EXEC: L2[ 0: 15]@ 118956 , Size: 276 + S8_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423936 LOAD: L2[ 0: 15]@ 119232 EXEC: L2[ 0: 15]@ 119232 , Size: 276 + S9_Infos INSTALL: HyperFlash[ 0: 15]@ 426012 LOAD: L2[ 0: 15]@ 121308 EXEC: L2[ 0: 15]@ 121308 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 15]@ 228528 LOAD: HyperRam[ 0: 15]@ 228528 EXEC: L2[ 8: 9]@ 175224 , Size: 76176 + S9_Biases INSTALL: HyperFlash[ 0: 15]@ 416484 LOAD: L2[ 0: 15]@ 111780 EXEC: L2[ 0: 15]@ 111780 , Size: 1104 + S9_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424212 LOAD: L2[ 0: 15]@ 119508 EXEC: L2[ 0: 15]@ 119508 , Size: 276 + S9_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 424488 LOAD: L2[ 0: 15]@ 119784 EXEC: L2[ 0: 15]@ 119784 , Size: 276 + S10_Infos INSTALL: HyperFlash[ 0: 15]@ 426024 LOAD: L2[ 0: 15]@ 121320 EXEC: L2[ 0: 15]@ 121320 , Size: 9 + S10_Weights INSTALL: HyperFlash[ 0: 15]@ 405168 LOAD: L2[ 0: 15]@ 100464 EXEC: L2[ 0: 15]@ 100464 , Size: 2484 + S10_Biases INSTALL: HyperFlash[ 0: 15]@ 417588 LOAD: L2[ 0: 15]@ 112884 EXEC: L2[ 0: 15]@ 112884 , Size: 1104 + S10_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424764 LOAD: L2[ 0: 15]@ 120060 EXEC: L2[ 0: 15]@ 120060 , Size: 276 + S10_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425040 LOAD: L2[ 0: 15]@ 120336 EXEC: L2[ 0: 15]@ 120336 , Size: 276 + S11_Infos INSTALL: HyperFlash[ 0: 15]@ 426036 LOAD: L2[ 0: 15]@ 121332 EXEC: L2[ 0: 15]@ 121332 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 15]@ 304704 LOAD: L2[ 0: 15]@ 0 EXEC: L2[ 0: 15]@ 0 , Size: 76176 + S11_Biases INSTALL: HyperFlash[ 0: 15]@ 418692 LOAD: L2[ 0: 15]@ 113988 EXEC: L2[ 0: 15]@ 113988 , Size: 1104 + S11_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 425316 LOAD: L2[ 0: 15]@ 120612 EXEC: L2[ 0: 15]@ 120612 , Size: 276 + S11_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425592 LOAD: L2[ 0: 15]@ 120888 EXEC: L2[ 0: 15]@ 120888 , Size: 276 + S12_Infos INSTALL: HyperFlash[ 0: 15]@ 426048 LOAD: L2[ 0: 15]@ 121344 EXEC: L2[ 0: 15]@ 121344 , Size: 9 + S13_Infos INSTALL: HyperFlash[ 0: 15]@ 426060 LOAD: L2[ 0: 15]@ 121356 EXEC: L2[ 0: 15]@ 121356 , Size: 9 + S13_Weights INSTALL: HyperFlash[ 0: 15]@ 391920 LOAD: L2[ 0: 15]@ 87216 EXEC: L2[ 0: 15]@ 87216 , Size: 3312 + S13_Biases INSTALL: HyperFlash[ 0: 15]@ 425868 LOAD: L2[ 0: 15]@ 121164 EXEC: L2[ 0: 15]@ 121164 , Size: 48 + S13_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 426072 LOAD: L2[ 0: 15]@ 121368 EXEC: L2[ 0: 15]@ 121368 , Size: 12 + S13_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 426084 LOAD: L2[ 0: 15]@ 121380 EXEC: L2[ 0: 15]@ 121380 , Size: 12 + S14_Infos INSTALL: HyperFlash[ 0: 15]@ 426096 LOAD: L2[ 0: 15]@ 121392 EXEC: L2[ 0: 15]@ 121392 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 139344 , Size: 69000 + S2_Output EXEC: L2[ 2: 3]@ 121404 , Size: 17940 + S3_Output EXEC: L2[ 3: 4]@ 139344 , Size: 17940 + S4_Output EXEC: L2[ 4: 5]@ 121404 , Size: 17940 + S5_Output EXEC: L2[ 5: 6]@ 139344 , Size: 17940 + S6_Output EXEC: L2[ 6: 7]@ 121404 , Size: 17940 + S7_Output EXEC: L2[ 7: 8]@ 139344 , Size: 17940 + S8_Output EXEC: L2[ 8: 9]@ 157284 , Size: 17940 + S9_Output EXEC: L2[ 9: 10]@ 121404 , Size: 17940 + S10_Output EXEC: L2[ 10: 11]@ 139344 , Size: 17940 + S11_Output EXEC: L2[ 11: 12]@ 121404 , Size: 17940 + S12_Output EXEC: L2[ 12: 13]@ 139344 , Size: 276 + S13_Output EXEC: L2[ 13: 14]@ 121404 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_276x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S10_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S11_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S12_AveragePool_13x5 +Generating Code For User Kernel: S13_Linear_12x276x1x1 +Generating Code For User Kernel: S14_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 11040 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Weights.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 1104 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Weights.tensor: 3312 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S13_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S14_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_l_quant_L3_Flash_Const.dat (size 426108) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 46872 +L2 Memory size (Bytes) : Given: 350000, Used: 284517 +L3 Memory size (Bytes) : Given: 6388608, Used: 304704 + +L3 Memory bandwidth for 1 graph run : 304704 Bytes +L2 Memory bandwidth for 1 graph run : 1596906 Bytes +Sum of all Kernels arguments size : 923956 Bytes +Tiling Bandwith overhead : 1.728335 Move/KerArgSize +Sum of baseline bandwidth : 50099340 Bytes +Percentage of baseline BW for L2 : 3.18748 % +Percentage of baseline BW for L3 : 0.6082 % +Sum of all Kernels operations : 28504464 Operations +Total amount of flash coefficients : 426108 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_l_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_l_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_l_quantKernels.h Header file for the generated C code + KWS_ds_cnn_l_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_LARGE.h --use_power +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DLARGE +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 24894 (Min: 0, Max: 25622), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 3.200000, Size: 4096, Total: 10576, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 3.200000, Size: 4096, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 15696, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 15856, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 15860, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 17908, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 22004, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 24052, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 24372, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 25360, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 25400, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 28600, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 28600, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 28600, Reusable Memory: 20136, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 28600 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_l_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 98/ 100 Accuracy: 98.00% +Pred/Tot: 196/ 200 Accuracy: 98.00% +Pred/Tot: 288/ 300 Accuracy: 96.00% +Pred/Tot: 382/ 400 Accuracy: 95.50% +Pred/Tot: 475/ 500 Accuracy: 95.00% +Pred/Tot: 570/ 600 Accuracy: 95.00% +Pred/Tot: 665/ 700 Accuracy: 95.00% +Pred/Tot: 758/ 800 Accuracy: 94.75% +Pred/Tot: 853/ 900 Accuracy: 94.78% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1037/1100 Accuracy: 94.27% +Pred/Tot: 1133/1200 Accuracy: 94.42% +Pred/Tot: 1226/1300 Accuracy: 94.31% +Pred/Tot: 1321/1400 Accuracy: 94.36% +Pred/Tot: 1415/1500 Accuracy: 94.33% +Pred/Tot: 1513/1600 Accuracy: 94.56% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1704/1800 Accuracy: 94.67% +Pred/Tot: 1797/1900 Accuracy: 94.58% +Pred/Tot: 1892/2000 Accuracy: 94.60% +Pred/Tot: 1986/2100 Accuracy: 94.57% +Pred/Tot: 2084/2200 Accuracy: 94.73% +Pred/Tot: 2179/2300 Accuracy: 94.74% +Pred/Tot: 2272/2400 Accuracy: 94.67% +Pred/Tot: 2363/2500 Accuracy: 94.52% +Pred/Tot: 2457/2600 Accuracy: 94.50% +Pred/Tot: 2553/2700 Accuracy: 94.56% +Pred/Tot: 2651/2800 Accuracy: 94.68% +Pred/Tot: 2746/2900 Accuracy: 94.69% +Pred/Tot: 2840/3000 Accuracy: 94.67% +Pred/Tot: 2936/3100 Accuracy: 94.71% +Pred/Tot: 3030/3200 Accuracy: 94.69% +Pred/Tot: 3126/3300 Accuracy: 94.73% +Pred/Tot: 3218/3400 Accuracy: 94.65% +Pred/Tot: 3313/3500 Accuracy: 94.66% +Pred/Tot: 3410/3600 Accuracy: 94.72% +Pred/Tot: 3501/3700 Accuracy: 94.62% +Pred/Tot: 3596/3800 Accuracy: 94.63% +Pred/Tot: 3688/3900 Accuracy: 94.56% +Pred/Tot: 3784/4000 Accuracy: 94.60% +Pred/Tot: 3878/4100 Accuracy: 94.59% +Pred/Tot: 3973/4200 Accuracy: 94.60% +Pred/Tot: 4068/4300 Accuracy: 94.60% +Pred/Tot: 4163/4400 Accuracy: 94.61% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4203/4444 Accuracy: 94.58% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 334 1 3 3 6 5 5 7 1 3 3] + [ 0 2 386 1 0 1 6 1 0 0 0 0] + [ 0 11 1 379 1 4 3 1 0 1 0 5] + [ 0 4 1 0 326 1 1 0 2 7 7 1] + [ 0 3 1 8 0 359 1 0 0 0 3 2] + [ 0 3 3 0 1 0 340 4 0 0 0 1] + [ 0 10 0 1 0 1 1 347 0 1 1 1] + [ 0 8 0 2 3 0 0 2 345 3 0 0] + [ 0 2 1 1 18 0 1 0 8 337 4 1] + [ 1 1 0 0 5 0 1 1 0 1 340 0] + [ 0 7 2 12 0 8 0 1 1 2 0 339]] +Pred/Tot: 92/ 100 Accuracy: 92.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 283/ 300 Accuracy: 94.33% +Pred/Tot: 378/ 400 Accuracy: 94.50% +Pred/Tot: 470/ 500 Accuracy: 94.00% +Pred/Tot: 565/ 600 Accuracy: 94.17% +Pred/Tot: 659/ 700 Accuracy: 94.14% +Pred/Tot: 754/ 800 Accuracy: 94.25% +Pred/Tot: 850/ 900 Accuracy: 94.44% +Pred/Tot: 946/1000 Accuracy: 94.60% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1134/1200 Accuracy: 94.50% +Pred/Tot: 1227/1300 Accuracy: 94.38% +Pred/Tot: 1320/1400 Accuracy: 94.29% +Pred/Tot: 1411/1500 Accuracy: 94.07% +Pred/Tot: 1509/1600 Accuracy: 94.31% +Pred/Tot: 1599/1700 Accuracy: 94.06% +Pred/Tot: 1694/1800 Accuracy: 94.11% +Pred/Tot: 1786/1900 Accuracy: 94.00% +Pred/Tot: 1880/2000 Accuracy: 94.00% +Pred/Tot: 1973/2100 Accuracy: 93.95% +Pred/Tot: 2067/2200 Accuracy: 93.95% +Pred/Tot: 2161/2300 Accuracy: 93.96% +Pred/Tot: 2259/2400 Accuracy: 94.12% +Pred/Tot: 2354/2500 Accuracy: 94.16% +Pred/Tot: 2447/2600 Accuracy: 94.12% +Pred/Tot: 2539/2700 Accuracy: 94.04% +Pred/Tot: 2636/2800 Accuracy: 94.14% +Pred/Tot: 2730/2900 Accuracy: 94.14% +Pred/Tot: 2823/3000 Accuracy: 94.10% +Pred/Tot: 2920/3100 Accuracy: 94.19% +Pred/Tot: 3016/3200 Accuracy: 94.25% +Pred/Tot: 3111/3300 Accuracy: 94.27% +Pred/Tot: 3202/3400 Accuracy: 94.18% +Pred/Tot: 3298/3500 Accuracy: 94.23% +Pred/Tot: 3393/3600 Accuracy: 94.25% +Pred/Tot: 3487/3700 Accuracy: 94.24% +Pred/Tot: 3583/3800 Accuracy: 94.29% +Pred/Tot: 3678/3900 Accuracy: 94.31% +Pred/Tot: 3770/4000 Accuracy: 94.25% +Pred/Tot: 3863/4100 Accuracy: 94.22% +Pred/Tot: 3956/4200 Accuracy: 94.19% +Pred/Tot: 4052/4300 Accuracy: 94.23% +Pred/Tot: 4142/4400 Accuracy: 94.14% +Pred/Tot: 4233/4500 Accuracy: 94.07% +Pred/Tot: 4326/4600 Accuracy: 94.04% +Pred/Tot: 4422/4700 Accuracy: 94.09% +Pred/Tot: 4515/4800 Accuracy: 94.06% + +FINAL TESTING ACCURACY: +Pred/Tot: 4600/4889 Accuracy: 94.09% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 378 0 5 0 5 3 3 6 2 5 1] + [ 0 6 404 5 0 0 3 0 0 0 1 0] + [ 0 2 0 386 0 8 3 0 0 0 0 6] + [ 0 5 0 0 388 3 1 0 4 14 7 3] + [ 0 6 1 12 0 382 2 1 0 0 0 2] + [ 0 11 6 2 1 0 390 2 0 0 0 0] + [ 0 12 0 0 0 0 5 375 3 0 1 0] + [ 0 11 0 0 0 5 0 0 366 12 1 1] + [ 0 5 0 3 13 0 0 0 5 369 4 3] + [ 0 2 0 1 2 3 0 1 0 1 399 2] + [ 0 9 0 26 0 6 1 3 0 1 1 355]] diff --git a/accuracy_log/log_test_large_power_v2.txt b/accuracy_log/log_test_large_power_v2.txt new file mode 100644 index 0000000..9176146 --- /dev/null +++ b/accuracy_log/log_test_large_power_v2.txt @@ -0,0 +1,2376 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_l_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_13 in: -33.88<(i8-0.00)*0.26467398<33.61 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_13) +forwards handler SOFTMAX_0_13 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -33.88<(i8-0.00)*0.26467398<33.61 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x276 B 1 +backwards FULLY_CONNECTED_0_12 in: -8.63<(i8-0.00)*0.06741016<8.56,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 2829000 + +==== Process Tiling For User Kernel: S4_Conv2d_276x1x10x4_Relu ======================= +S4_Conv2d_276x1x10x4_Relu Partition[0] Size = 312657 (Min: 200, Max: 440689), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: Scale, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: ScaleN, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 7 Parametric Space: [D1, M0=144] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 16 [ 52], Size: 320, Total: 320, Move: 1940 (Decl x 3.959184) L2 + Bias : Ratio: 0.000000, Size: 1152, Total: 1472, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 1748, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 2024, Move: 276 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 11520, Total: 13544, Move: 11040 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 11520, Total: 25064, Move: 69000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 23040, Total: 48104, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 48116, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_276x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 48116, L2Move: 83645, L3Move: 0, Tiling Overhead: 1.017641 +S4_Conv2d_276x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 11520 Bytes +S4_Conv2d_276x1x10x4_Relu Found Parametric value for space D1 (Initial: 276, Div: 8) = 144 [144*1 + 132] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 7 +Kernel: S4_Conv2d_276x1x10x4_Relu, Arg: In, Last Tile: 2+8, Pad: 5 => Requires Padding of tile N-1 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: In, Size: 160, Base1: 0, Base2: 160 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Bias, Size: 576, Base1: 320, Base2: 896 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Scale, Size: 276, Base1: 1472, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ScaleN, Size: 276, Base1: 1748, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Filter, Size: 5760, Base1: 2024, Base2: 7784 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Out, Size: 5760, Base1: 13544, Base2: 19304 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ConvOut, Size: 23040, Base1: 25064, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 48104, Base2: 0 +S4_Conv2d_276x1x10x4_Relu For Iter Space: 0 Iteration count: 7 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 48116, Reusable Memory: 620, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S7_Conv2d_276x1x3x3_Relu ======================= +S7_Conv2d_276x1x3x3_Relu Partition[0] Size = 102145 (Min: 60, Max: 265097), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 1104 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 2484 Bytes buffer +S7_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 69000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 25104, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 25380, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 25656, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 28140, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 34380, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 46860, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 46872, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 46872, L2Move: 91089, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 48 [48*5 + 36], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 25104, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 25380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 25656, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 28140, Base2: 31260 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 34380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 46860, Base2: 0 +S7_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 46872, Reusable Memory: 1864, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_276x276x1x1_Relu ======================= +S10_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S10_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S13_Conv2d_276x1x3x3_Relu ======================= +S13_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S13_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S13_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_276x276x1x1_Relu ======================= +S16_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S16_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S19_Conv2d_276x1x3x3_Relu ======================= +S19_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S19_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S19_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_276x276x1x1_Relu ======================= +S22_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S22_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S25_Conv2d_276x1x3x3_Relu ======================= +S25_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S25_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S25_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_276x276x1x1_Relu ======================= +S28_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S28_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S31_Conv2d_276x1x3x3_Relu ======================= +S31_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S31_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S31_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S31_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S31_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S34_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S34_Conv2d_276x276x1x1_Relu ======================= +S34_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S34_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 17940 + +==== Process Tiling For User Kernel: S35_AveragePool_13x5 ======================= +S35_AveragePool_13x5 Partition[0] Size = 36999 (Min: 130, Max: 36459), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S35_AveragePool_13x5, Total Raw Memory: 18228 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S35_AveragePool_13x5, Arg: In, Size: 17940, Base1: 0, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Out, Size: 276, Base1: 17940, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 18216, Base2: 0 +S35_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 18228, Reusable Memory: 30508, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S38_Linear_12x276x1x1, Linear: InDim: 276, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S38_Linear_12x276x1x1 ======================= +S38_Linear_12x276x1x1 Partition[0] Size = 7091 (Min: 0, Max: 7175), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S38_Linear_12x276x1x1, Total Raw Memory: 3684 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S38_Linear_12x276x1x1, Arg: In, Size: 276, Base1: 0, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Filter, Size: 3312, Base1: 276, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Bias, Size: 48, Base1: 3588, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Out, Size: 12, Base1: 3636, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Scale, Size: 12, Base1: 3648, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: ScaleN, Size: 12, Base1: 3660, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Infos, Size: 12, Base1: 3672, Base2: 0 +S38_Linear_12x276x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 3684, Reusable Memory: 45052, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S39_SoftMax ======================= + S39_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S39_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S39_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S39_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S39_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S39_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S38_Output[ In] Adding Edge From S38_Linear_12x276x1x1 To S39_SoftMax New + Symbol: S35_Output[ In] Adding Edge From S35_AveragePool_13x5 To S38_Linear_12x276x1x1 New + Symbol: S34_Output[ In] Adding Edge From S34_Conv2d_276x276x1x1_Relu To S35_AveragePool_13x5 New + Symbol: S31_Output[ In] Adding Edge From S31_Conv2d_276x1x3x3_Relu To S34_Conv2d_276x276x1x1_Relu New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_276x276x1x1_Relu To S31_Conv2d_276x1x3x3_Relu New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_276x1x3x3_Relu To S28_Conv2d_276x276x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_276x276x1x1_Relu To S25_Conv2d_276x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_276x1x3x3_Relu To S22_Conv2d_276x276x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_276x276x1x1_Relu To S19_Conv2d_276x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_276x1x3x3_Relu To S16_Conv2d_276x276x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_276x276x1x1_Relu To S13_Conv2d_276x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_276x1x3x3_Relu To S10_Conv2d_276x276x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_276x1x10x4_Relu To S7_Conv2d_276x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S39_SoftMax To __GraphExit__ New + Symbol: S39_Infos[ In] Adding Edge From __GraphEntry__ To S39_SoftMax New + Symbol: S38_Infos[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 New + Symbol: S38_Mul_shift[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S38_Mul_scale[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S35_Infos[ In] Adding Edge From __GraphEntry__ To S35_AveragePool_13x5 New + Symbol: S34_Infos[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu New + Symbol: S34_Mul_shift[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S34_Mul_scale[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S31_Infos[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu New + Symbol: S31_Mul_shift[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S31_Mul_scale[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 86940 => Alloc: OK + +After Const Allocation, TopL3: 304704, TopL2: 284517 => Alloc: OK + +[FULL] Remapping [163116 .. 284516] to [0 .. 121400] Align compensation: 3 +[PART] Remapping [0 .. 163115] to [121404 .. 284519] Align compensation: 0 +[PART] Remapping [284517 .. 349999] to [284520 .. 350002] Align compensation: 1 +[FULL] Remapping [0 .. 304703] to [0 .. 304703] Align compensation: 0 +[PART] Remapping [304704 .. 6388607] to [304704 .. 6388607] Align compensation: 0 +Symbol allocation for graph KWS_ds_cnn_l_quantCNN is sucessfull, L2: 284517 out of 350000, L3: 304704 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => Dscnnconv_ds_5dw_convweights_q + (null) => Dscnnconv_ds_5dw_convdepthwise + (null) => S31_Mul_scale + (null) => S31_Mul_shift + (null) => S31_Infos + (null) => Dscnnconv_ds_5pw_convweights_q + (null) => Dscnnconv_ds_5pw_convconv2d_fo + (null) => S34_Mul_scale + (null) => S34_Mul_shift + (null) => S34_Infos + (null) => S35_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S38_Mul_scale + (null) => S38_Mul_shift + (null) => S38_Infos + (null) => S39_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 13 14 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_276x1x10x4_Relu, Operations: 2829000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1940, TileOverhead: 3.959184, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 11040, L3_Move: 0, L2_Move: 11040, TileOverhead: 1.000000, L2Buff: 0, Addr: 2024 +CI Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 320 + O Out => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 13544 +CI Buff Scale => S4_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1748 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 48104 + Kernel Memory : L3: 0, L2: 82195 + Kernel Total Memory: 82195, L3 moves: 0, L2 moves: 83645, Move overhead: 1.017641 + Kernel Operations : 2829000 [KernelOper/GraphOper: 9.924761%], Move/Operation ratio: [L3: 0.000000, L2: 0.029567] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 25656 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28140 +CI Buff Scale => S7_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25104 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25380 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 46860 + Kernel Memory : L3: 0, L2: 91089 + Kernel Total Memory: 91089, L3 moves: 0, L2 moves: 91089, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.507742] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S10_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S13_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S16_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S19_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S22_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S25_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S28_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S31_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_5dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_5dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S31_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S31_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S31_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S31_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S34_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_5pw_convweights_q --L2-- Size: 76176, L3_Move: 0, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_5pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S34_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S34_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S34_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 0, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.000000, L2: 0.041083] + Successors: 12 + + Living Dynamic Symbols: [S31_Output] [S34_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S35_AveragePool_13x5, Operations: 17940 + I Buff In => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 17940 +CI Buff Infos => S35_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 18216 + Kernel Memory : L3: 0, L2: 18225 + Kernel Total Memory: 18225, L3 moves: 0, L2 moves: 18225, Move overhead: 1.000000 + Kernel Operations : 17940 [KernelOper/GraphOper: 0.062938%], Move/Operation ratio: [L3: 0.000000, L2: 1.015886] + Successors: 13 + + Living Dynamic Symbols: [S34_Output] [S35_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: UKer S38_Linear_12x276x1x1, Operations: 3312 + I Buff In => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 3312, L3_Move: 0, L2_Move: 3312, TileOverhead: 1.000000, L2Buff: 0, Addr: 276 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 3588 + O Buff Out => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3636 +CI Buff Scale => S38_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3648 +CI Buff ScaleN => S38_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3660 +CI Buff Infos => S38_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 3672 + Kernel Memory : L3: 0, L2: 3681 + Kernel Total Memory: 3681, L3 moves: 0, L2 moves: 3681, Move overhead: 1.000000 + Kernel Operations : 3312 [KernelOper/GraphOper: 0.011619%], Move/Operation ratio: [L3: 0.000000, L2: 1.111413] + Successors: 14 + + Living Dynamic Symbols: [S35_Output] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 14, Channel 0 0: UKer S39_SoftMax, Operations: 12 + I Buff In => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S39_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000042%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 15 + + Living Dynamic Symbols: [Output_1] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 15, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 113721 + Graph nodes min global memory: L3: 76176, L2: 113724 + Graph sum of kernel arguments size: 923956, L3 moves: 304704, L2 moves: 1373906, Move overhead: 1.486982 + Graph total operations: 28504464 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 923956, Total L3_Move: 304704, Total L2_Move: 1373906, Tiling Overhead Average: 1.486982 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic -228528 86940 + Const 304704 77844 + Total 76176 113724 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 15]@ 380880 LOAD: L2[ 0: 15]@ 76176 EXEC: L2[ 0: 15]@ 76176 , Size: 11040 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 15]@ 407652 LOAD: L2[ 0: 15]@ 102948 EXEC: L2[ 0: 15]@ 102948 , Size: 1104 + S4_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 419796 LOAD: L2[ 0: 15]@ 115092 EXEC: L2[ 0: 15]@ 115092 , Size: 276 + S4_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420072 LOAD: L2[ 0: 15]@ 115368 EXEC: L2[ 0: 15]@ 115368 , Size: 276 + S4_Infos INSTALL: HyperFlash[ 0: 15]@ 425916 LOAD: L2[ 0: 15]@ 121212 EXEC: L2[ 0: 15]@ 121212 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 395232 LOAD: L2[ 0: 15]@ 90528 EXEC: L2[ 0: 15]@ 90528 , Size: 2484 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 408756 LOAD: L2[ 0: 15]@ 104052 EXEC: L2[ 0: 15]@ 104052 , Size: 1104 + S7_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420348 LOAD: L2[ 0: 15]@ 115644 EXEC: L2[ 0: 15]@ 115644 , Size: 276 + S7_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420624 LOAD: L2[ 0: 15]@ 115920 EXEC: L2[ 0: 15]@ 115920 , Size: 276 + S7_Infos INSTALL: HyperFlash[ 0: 15]@ 425928 LOAD: L2[ 0: 15]@ 121224 EXEC: L2[ 0: 15]@ 121224 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 0 LOAD: HyperRam[ 0: 15]@ 0 EXEC: L2[ 1: 3]@ 208344 , Size: 76176 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 409860 LOAD: L2[ 0: 15]@ 105156 EXEC: L2[ 0: 15]@ 105156 , Size: 1104 + S10_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420900 LOAD: L2[ 0: 15]@ 116196 EXEC: L2[ 0: 15]@ 116196 , Size: 276 + S10_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421176 LOAD: L2[ 0: 15]@ 116472 EXEC: L2[ 0: 15]@ 116472 , Size: 276 + S10_Infos INSTALL: HyperFlash[ 0: 15]@ 425940 LOAD: L2[ 0: 15]@ 121236 EXEC: L2[ 0: 15]@ 121236 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 397716 LOAD: L2[ 0: 15]@ 93012 EXEC: L2[ 0: 15]@ 93012 , Size: 2484 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 410964 LOAD: L2[ 0: 15]@ 106260 EXEC: L2[ 0: 15]@ 106260 , Size: 1104 + S13_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 421452 LOAD: L2[ 0: 15]@ 116748 EXEC: L2[ 0: 15]@ 116748 , Size: 276 + S13_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421728 LOAD: L2[ 0: 15]@ 117024 EXEC: L2[ 0: 15]@ 117024 , Size: 276 + S13_Infos INSTALL: HyperFlash[ 0: 15]@ 425952 LOAD: L2[ 0: 15]@ 121248 EXEC: L2[ 0: 15]@ 121248 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 76176 LOAD: HyperRam[ 0: 15]@ 76176 EXEC: L2[ 4: 5]@ 157284 , Size: 76176 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 412068 LOAD: L2[ 0: 15]@ 107364 EXEC: L2[ 0: 15]@ 107364 , Size: 1104 + S16_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422004 LOAD: L2[ 0: 15]@ 117300 EXEC: L2[ 0: 15]@ 117300 , Size: 276 + S16_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422280 LOAD: L2[ 0: 15]@ 117576 EXEC: L2[ 0: 15]@ 117576 , Size: 276 + S16_Infos INSTALL: HyperFlash[ 0: 15]@ 425964 LOAD: L2[ 0: 15]@ 121260 EXEC: L2[ 0: 15]@ 121260 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 400200 LOAD: L2[ 0: 15]@ 95496 EXEC: L2[ 0: 15]@ 95496 , Size: 2484 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 413172 LOAD: L2[ 0: 15]@ 108468 EXEC: L2[ 0: 15]@ 108468 , Size: 1104 + S19_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422556 LOAD: L2[ 0: 15]@ 117852 EXEC: L2[ 0: 15]@ 117852 , Size: 276 + S19_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422832 LOAD: L2[ 0: 15]@ 118128 EXEC: L2[ 0: 15]@ 118128 , Size: 276 + S19_Infos INSTALL: HyperFlash[ 0: 15]@ 425976 LOAD: L2[ 0: 15]@ 121272 EXEC: L2[ 0: 15]@ 121272 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 152352 LOAD: HyperRam[ 0: 15]@ 152352 EXEC: L2[ 6: 7]@ 157284 , Size: 76176 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 414276 LOAD: L2[ 0: 15]@ 109572 EXEC: L2[ 0: 15]@ 109572 , Size: 1104 + S22_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423108 LOAD: L2[ 0: 15]@ 118404 EXEC: L2[ 0: 15]@ 118404 , Size: 276 + S22_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423384 LOAD: L2[ 0: 15]@ 118680 EXEC: L2[ 0: 15]@ 118680 , Size: 276 + S22_Infos INSTALL: HyperFlash[ 0: 15]@ 425988 LOAD: L2[ 0: 15]@ 121284 EXEC: L2[ 0: 15]@ 121284 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 402684 LOAD: L2[ 0: 15]@ 97980 EXEC: L2[ 0: 15]@ 97980 , Size: 2484 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 415380 LOAD: L2[ 0: 15]@ 110676 EXEC: L2[ 0: 15]@ 110676 , Size: 1104 + S25_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423660 LOAD: L2[ 0: 15]@ 118956 EXEC: L2[ 0: 15]@ 118956 , Size: 276 + S25_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423936 LOAD: L2[ 0: 15]@ 119232 EXEC: L2[ 0: 15]@ 119232 , Size: 276 + S25_Infos INSTALL: HyperFlash[ 0: 15]@ 426000 LOAD: L2[ 0: 15]@ 121296 EXEC: L2[ 0: 15]@ 121296 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 228528 LOAD: HyperRam[ 0: 15]@ 228528 EXEC: L2[ 8: 9]@ 175224 , Size: 76176 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 416484 LOAD: L2[ 0: 15]@ 111780 EXEC: L2[ 0: 15]@ 111780 , Size: 1104 + S28_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424212 LOAD: L2[ 0: 15]@ 119508 EXEC: L2[ 0: 15]@ 119508 , Size: 276 + S28_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 424488 LOAD: L2[ 0: 15]@ 119784 EXEC: L2[ 0: 15]@ 119784 , Size: 276 + S28_Infos INSTALL: HyperFlash[ 0: 15]@ 426012 LOAD: L2[ 0: 15]@ 121308 EXEC: L2[ 0: 15]@ 121308 , Size: 9 + Dscnnconv_ds_5dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 405168 LOAD: L2[ 0: 15]@ 100464 EXEC: L2[ 0: 15]@ 100464 , Size: 2484 + Dscnnconv_ds_5dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 417588 LOAD: L2[ 0: 15]@ 112884 EXEC: L2[ 0: 15]@ 112884 , Size: 1104 + S31_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424764 LOAD: L2[ 0: 15]@ 120060 EXEC: L2[ 0: 15]@ 120060 , Size: 276 + S31_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425040 LOAD: L2[ 0: 15]@ 120336 EXEC: L2[ 0: 15]@ 120336 , Size: 276 + S31_Infos INSTALL: HyperFlash[ 0: 15]@ 426024 LOAD: L2[ 0: 15]@ 121320 EXEC: L2[ 0: 15]@ 121320 , Size: 9 + Dscnnconv_ds_5pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 304704 LOAD: L2[ 0: 15]@ 0 EXEC: L2[ 0: 15]@ 0 , Size: 76176 + Dscnnconv_ds_5pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 418692 LOAD: L2[ 0: 15]@ 113988 EXEC: L2[ 0: 15]@ 113988 , Size: 1104 + S34_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 425316 LOAD: L2[ 0: 15]@ 120612 EXEC: L2[ 0: 15]@ 120612 , Size: 276 + S34_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425592 LOAD: L2[ 0: 15]@ 120888 EXEC: L2[ 0: 15]@ 120888 , Size: 276 + S34_Infos INSTALL: HyperFlash[ 0: 15]@ 426036 LOAD: L2[ 0: 15]@ 121332 EXEC: L2[ 0: 15]@ 121332 , Size: 9 + S35_Infos INSTALL: HyperFlash[ 0: 15]@ 426048 LOAD: L2[ 0: 15]@ 121344 EXEC: L2[ 0: 15]@ 121344 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 15]@ 391920 LOAD: L2[ 0: 15]@ 87216 EXEC: L2[ 0: 15]@ 87216 , Size: 3312 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 15]@ 425868 LOAD: L2[ 0: 15]@ 121164 EXEC: L2[ 0: 15]@ 121164 , Size: 48 + S38_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 426060 LOAD: L2[ 0: 15]@ 121356 EXEC: L2[ 0: 15]@ 121356 , Size: 12 + S38_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 426072 LOAD: L2[ 0: 15]@ 121368 EXEC: L2[ 0: 15]@ 121368 , Size: 12 + S38_Infos INSTALL: HyperFlash[ 0: 15]@ 426084 LOAD: L2[ 0: 15]@ 121380 EXEC: L2[ 0: 15]@ 121380 , Size: 9 + S39_Infos INSTALL: HyperFlash[ 0: 15]@ 426096 LOAD: L2[ 0: 15]@ 121392 EXEC: L2[ 0: 15]@ 121392 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 139344 , Size: 69000 + S7_Output EXEC: L2[ 2: 3]@ 121404 , Size: 17940 + S10_Output EXEC: L2[ 3: 4]@ 139344 , Size: 17940 + S13_Output EXEC: L2[ 4: 5]@ 121404 , Size: 17940 + S16_Output EXEC: L2[ 5: 6]@ 139344 , Size: 17940 + S19_Output EXEC: L2[ 6: 7]@ 121404 , Size: 17940 + S22_Output EXEC: L2[ 7: 8]@ 139344 , Size: 17940 + S25_Output EXEC: L2[ 8: 9]@ 157284 , Size: 17940 + S28_Output EXEC: L2[ 9: 10]@ 121404 , Size: 17940 + S31_Output EXEC: L2[ 10: 11]@ 139344 , Size: 17940 + S34_Output EXEC: L2[ 11: 12]@ 121404 , Size: 17940 + S35_Output EXEC: L2[ 12: 13]@ 139344 , Size: 276 + S38_Output EXEC: L2[ 13: 14]@ 121404 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_276x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S31_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S34_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S35_AveragePool_13x5 +Generating Code For User Kernel: S38_Linear_12x276x1x1 +Generating Code For User Kernel: S39_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 11040 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S35_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 3312 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S39_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_l_quant_L3_Flash_Const.dat (size 426108) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 48116 +L2 Memory size (Bytes) : Given: 350000, Used: 284517 +L3 Memory size (Bytes) : Given: 6388608, Used: 304704 + +L3 Memory bandwidth for 1 graph run : 304704 Bytes +L2 Memory bandwidth for 1 graph run : 1373906 Bytes +Sum of all Kernels arguments size : 923956 Bytes +Tiling Bandwith overhead : 1.486982 Move/KerArgSize +Sum of baseline bandwidth : 50099340 Bytes +Percentage of baseline BW for L2 : 2.74236 % +Percentage of baseline BW for L3 : 0.6082 % +Sum of all Kernels operations : 28504464 Operations +Total amount of flash coefficients : 426108 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_l_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_l_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_l_quantKernels.h Header file for the generated C code + KWS_ds_cnn_l_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 14:05:09.750986 140676110542656 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 14:05:09.751533 140676110542656 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 14:05:09.751920 140676110542656 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 14:05:09.753730: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 14:05:09.766392: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 14:05:09.766994: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x563ae5bcc430 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 14:05:09.767063: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 14:05:09.770229: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 14:05:09.770311: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 14:05:09.770367: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 14:07:50.166987 140676110542656 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 14:07:50.172054 140676110542656 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 14:07:50.812488 140676110542656 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 14:07:50.813130 140676110542656 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 14:13:23.403684 140676110542656 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 194/ 200 Accuracy: 97.00% +Pred/Tot: 286/ 300 Accuracy: 95.33% +Pred/Tot: 380/ 400 Accuracy: 95.00% +Pred/Tot: 477/ 500 Accuracy: 95.40% +Pred/Tot: 571/ 600 Accuracy: 95.17% +Pred/Tot: 669/ 700 Accuracy: 95.57% +Pred/Tot: 760/ 800 Accuracy: 95.00% +Pred/Tot: 853/ 900 Accuracy: 94.78% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1039/1100 Accuracy: 94.45% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1228/1300 Accuracy: 94.46% +Pred/Tot: 1322/1400 Accuracy: 94.43% +Pred/Tot: 1416/1500 Accuracy: 94.40% +Pred/Tot: 1513/1600 Accuracy: 94.56% +Pred/Tot: 1609/1700 Accuracy: 94.65% +Pred/Tot: 1706/1800 Accuracy: 94.78% +Pred/Tot: 1801/1900 Accuracy: 94.79% +Pred/Tot: 1898/2000 Accuracy: 94.90% +Pred/Tot: 1995/2100 Accuracy: 95.00% +Pred/Tot: 2091/2200 Accuracy: 95.05% +Pred/Tot: 2186/2300 Accuracy: 95.04% +Pred/Tot: 2278/2400 Accuracy: 94.92% +Pred/Tot: 2368/2500 Accuracy: 94.72% +Pred/Tot: 2460/2600 Accuracy: 94.62% +Pred/Tot: 2557/2700 Accuracy: 94.70% +Pred/Tot: 2653/2800 Accuracy: 94.75% +Pred/Tot: 2748/2900 Accuracy: 94.76% +Pred/Tot: 2843/3000 Accuracy: 94.77% +Pred/Tot: 2940/3100 Accuracy: 94.84% +Pred/Tot: 3035/3200 Accuracy: 94.84% +Pred/Tot: 3131/3300 Accuracy: 94.88% +Pred/Tot: 3222/3400 Accuracy: 94.76% +Pred/Tot: 3318/3500 Accuracy: 94.80% +Pred/Tot: 3414/3600 Accuracy: 94.83% +Pred/Tot: 3507/3700 Accuracy: 94.78% +Pred/Tot: 3604/3800 Accuracy: 94.84% +Pred/Tot: 3694/3900 Accuracy: 94.72% +Pred/Tot: 3788/4000 Accuracy: 94.70% +Pred/Tot: 3884/4100 Accuracy: 94.73% +Pred/Tot: 3979/4200 Accuracy: 94.74% +Pred/Tot: 4072/4300 Accuracy: 94.70% +Pred/Tot: 4169/4400 Accuracy: 94.75% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4211/4444 Accuracy: 94.76% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 337 0 1 4 7 3 3 7 2 3 4] + [ 0 2 384 3 0 1 3 2 0 0 0 2] + [ 1 11 0 381 0 1 3 0 0 1 1 7] + [ 0 2 1 0 327 0 2 0 2 10 5 1] + [ 0 0 0 9 0 360 1 0 0 1 3 3] + [ 0 3 5 0 0 0 341 2 0 0 0 1] + [ 0 6 0 1 0 0 2 352 0 1 1 0] + [ 0 8 0 2 3 0 0 1 341 8 0 0] + [ 0 2 1 1 17 0 1 1 6 340 3 1] + [ 1 3 0 0 7 1 1 0 0 3 334 0] + [ 0 11 0 9 0 4 0 0 1 3 1 343]] +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 187/ 200 Accuracy: 93.50% +Pred/Tot: 282/ 300 Accuracy: 94.00% +Pred/Tot: 377/ 400 Accuracy: 94.25% +Pred/Tot: 469/ 500 Accuracy: 93.80% +Pred/Tot: 563/ 600 Accuracy: 93.83% +Pred/Tot: 659/ 700 Accuracy: 94.14% +Pred/Tot: 755/ 800 Accuracy: 94.38% +Pred/Tot: 852/ 900 Accuracy: 94.67% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1229/1300 Accuracy: 94.54% +Pred/Tot: 1324/1400 Accuracy: 94.57% +Pred/Tot: 1417/1500 Accuracy: 94.47% +Pred/Tot: 1514/1600 Accuracy: 94.62% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1705/1800 Accuracy: 94.72% +Pred/Tot: 1795/1900 Accuracy: 94.47% +Pred/Tot: 1890/2000 Accuracy: 94.50% +Pred/Tot: 1985/2100 Accuracy: 94.52% +Pred/Tot: 2081/2200 Accuracy: 94.59% +Pred/Tot: 2175/2300 Accuracy: 94.57% +Pred/Tot: 2272/2400 Accuracy: 94.67% +Pred/Tot: 2367/2500 Accuracy: 94.68% +Pred/Tot: 2462/2600 Accuracy: 94.69% +Pred/Tot: 2557/2700 Accuracy: 94.70% +Pred/Tot: 2653/2800 Accuracy: 94.75% +Pred/Tot: 2749/2900 Accuracy: 94.79% +Pred/Tot: 2843/3000 Accuracy: 94.77% +Pred/Tot: 2937/3100 Accuracy: 94.74% +Pred/Tot: 3034/3200 Accuracy: 94.81% +Pred/Tot: 3133/3300 Accuracy: 94.94% +Pred/Tot: 3225/3400 Accuracy: 94.85% +Pred/Tot: 3319/3500 Accuracy: 94.83% +Pred/Tot: 3416/3600 Accuracy: 94.89% +Pred/Tot: 3508/3700 Accuracy: 94.81% +Pred/Tot: 3604/3800 Accuracy: 94.84% +Pred/Tot: 3701/3900 Accuracy: 94.90% +Pred/Tot: 3796/4000 Accuracy: 94.90% +Pred/Tot: 3888/4100 Accuracy: 94.83% +Pred/Tot: 3985/4200 Accuracy: 94.88% +Pred/Tot: 4080/4300 Accuracy: 94.88% +Pred/Tot: 4174/4400 Accuracy: 94.86% +Pred/Tot: 4264/4500 Accuracy: 94.76% +Pred/Tot: 4357/4600 Accuracy: 94.72% +Pred/Tot: 4453/4700 Accuracy: 94.74% +Pred/Tot: 4547/4800 Accuracy: 94.73% + +FINAL TESTING ACCURACY: +Pred/Tot: 4633/4889 Accuracy: 94.76% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 374 0 3 1 9 2 4 7 1 4 3] + [ 0 5 409 4 0 0 1 0 0 0 0 0] + [ 0 3 1 393 0 3 1 0 0 1 0 3] + [ 0 5 0 0 398 3 0 0 3 9 5 2] + [ 0 7 1 10 0 380 2 1 0 0 2 3] + [ 0 8 3 1 2 0 396 2 0 0 0 0] + [ 0 9 0 0 0 0 4 380 1 0 2 0] + [ 0 8 0 0 1 3 1 0 368 11 1 3] + [ 0 4 0 1 20 0 1 0 4 367 1 4] + [ 0 2 0 0 2 3 0 2 0 1 396 5] + [ 0 10 1 21 0 3 0 1 0 1 1 364]] diff --git a/accuracy_log/log_test_large_power_v2_norm9.txt b/accuracy_log/log_test_large_power_v2_norm9.txt new file mode 100644 index 0000000..8aa5152 --- /dev/null +++ b/accuracy_log/log_test_large_power_v2_norm9.txt @@ -0,0 +1,2349 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_l_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_13 in: -33.88<(i8-0.00)*0.26467398<33.61 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_13) +forwards handler SOFTMAX_0_13 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -33.88<(i8-0.00)*0.26467398<33.61 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x276 B 1 +backwards FULLY_CONNECTED_0_12 in: -8.63<(i8-0.00)*0.06741016<8.56,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 2829000 + +==== Process Tiling For User Kernel: S4_Conv2d_276x1x10x4_Relu ======================= +S4_Conv2d_276x1x10x4_Relu Partition[0] Size = 312657 (Min: 200, Max: 440689), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: Scale, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu Full buffering on Arg: ScaleN, was using 288 Bytes will require 276 Bytes buffer +S4_Conv2d_276x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 7 Parametric Space: [D1, M0=144] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 16 [ 52], Size: 320, Total: 320, Move: 1940 (Decl x 3.959184) L2 + Bias : Ratio: 0.000000, Size: 1152, Total: 1472, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 1748, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 2024, Move: 276 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 11520, Total: 13544, Move: 11040 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 11520, Total: 25064, Move: 69000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 4 [ 25], Size: 23040, Total: 48104, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 48116, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_276x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 48116, L2Move: 83645, L3Move: 0, Tiling Overhead: 1.017641 +S4_Conv2d_276x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 11520 Bytes +S4_Conv2d_276x1x10x4_Relu Found Parametric value for space D1 (Initial: 276, Div: 8) = 144 [144*1 + 132] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 7 +Kernel: S4_Conv2d_276x1x10x4_Relu, Arg: In, Last Tile: 2+8, Pad: 5 => Requires Padding of tile N-1 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: In, Size: 160, Base1: 0, Base2: 160 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Bias, Size: 576, Base1: 320, Base2: 896 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Scale, Size: 276, Base1: 1472, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ScaleN, Size: 276, Base1: 1748, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Filter, Size: 5760, Base1: 2024, Base2: 7784 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Out, Size: 5760, Base1: 13544, Base2: 19304 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: ConvOut, Size: 23040, Base1: 25064, Base2: 0 +Ker: S4_Conv2d_276x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 48104, Base2: 0 +S4_Conv2d_276x1x10x4_Relu For Iter Space: 0 Iteration count: 7 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 48116, Reusable Memory: 620, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S7_Conv2d_276x1x3x3_Relu ======================= +S7_Conv2d_276x1x3x3_Relu Partition[0] Size = 102145 (Min: 60, Max: 265097), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 1104 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 276 Bytes buffer +S7_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 2484 Bytes buffer +S7_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 69000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 25104, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 25380, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 25656, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 28140, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 34380, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 46860, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 46872, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 46872, L2Move: 91089, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 48 [48*5 + 36], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 25104, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 25380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 25656, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 28140, Base2: 31260 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 34380, Base2: 0 +Ker: S7_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 46860, Base2: 0 +S7_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 46872, Reusable Memory: 1864, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_276x276x1x1_Relu ======================= +S10_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S10_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S10_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S10_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S10_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S13_Conv2d_276x1x3x3_Relu ======================= +S13_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S13_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S13_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S13_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S13_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_276x276x1x1_Relu ======================= +S16_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S16_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S16_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S16_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S16_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S19_Conv2d_276x1x3x3_Relu ======================= +S19_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S19_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S19_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S19_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S19_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_276x276x1x1_Relu ======================= +S22_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S22_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S22_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S22_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S22_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S25_Conv2d_276x1x3x3_Relu ======================= +S25_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S25_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S25_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S25_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S25_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_276x276x1x1_Relu ======================= +S28_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S28_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S28_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S28_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S28_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 179400 + +==== Process Tiling For User Kernel: S31_Conv2d_276x1x3x3_Relu ======================= +S31_Conv2d_276x1x3x3_Relu Partition[0] Size = 91105 (Min: 30, Max: 157457), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 1104 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 276 Bytes buffer +S31_Conv2d_276x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 2484 Bytes buffer +S31_Conv2d_276x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 17940 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 1104, Total: 11504, Move: 1104 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 276, Total: 11780, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 276, Total: 12056, Move: 276 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 2484, Total: 14540, Move: 2484 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 24940, Move: 17940 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 45740, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45752, Move: 9 (Decl x 1.000000) L2 +S31_Conv2d_276x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45752, L2Move: 40029, L3Move: 0, Tiling Overhead: 1.000000 +S31_Conv2d_276x1x3x3_Relu Found Parametric value for space D0 (Initial: 276, Div: 8) = 80 [80*3 + 36], Iteration for Tiled Space: 1 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Bias, Size: 1104, Base1: 10400, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Scale, Size: 276, Base1: 11504, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ScaleN, Size: 276, Base1: 11780, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Filter, Size: 2484, Base1: 12056, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 14540, Base2: 19740 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 24940, Base2: 0 +Ker: S31_Conv2d_276x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45740, Base2: 0 +S31_Conv2d_276x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45752, Reusable Memory: 2984, Used L2 Memory: 0 +================================================================================================= + +InFeat: 276, OutFeat: 276 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 4969380 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S34_Conv2d_276x276x1x1_Relu +In1 => W: 276, H: 276 +In2 => W: 65, H: 276, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 276 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S34_Conv2d_276x276x1x1_Relu ======================= +S34_Conv2d_276x276x1x1_Relu Partition[0] Size = 2233 (Min: 2208, Max: 35921), Fraction: 0.21, Giving: 10399 Bytes out of 48736 Bytes +S34_Conv2d_276x276x1x1_Relu Partition[1] Size = 8232 (Min: 4416, Max: 191088), Fraction: 0.79, Giving: 38336 Bytes out of 48736 Bytes + +Reference object: In1, Dim=276 + In1 Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 276, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 276, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 276, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile1 Iteration Count: 6 +* KerBuff : Ratio: 0.000000, Size: 1104, Total: 1104, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 276, VarDim: 48 [ 276], Size: 26496, Total: 27600, Move: 76176 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 1104, Total: 28704, Move: 1104 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 48 [ 276], Size: 6240, Total: 34944, Move: 17940 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 276, Total: 35220, Move: 276 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 276, Total: 35496, Move: 276 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35496, L2Move: 95772, L3Move: 0, Tiling Overhead: 1.000000 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 6 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: KerBuff, Size: 1104, Base1: 0, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In1, Size: 13248, Base1: 1104, Base2: 14352 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Bias, Size: 1104, Base1: 27600, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Out, Size: 3120, Base1: 28704, Base2: 31824 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Scale, Size: 276, Base1: 34944, Base2: 0 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: ScaleN, Size: 276, Base1: 35220, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 1 Iteration count: 6 (Last one is truncated), Given L1 Memory: 38336, Used L1 Memory: 35496, Reusable Memory: 2840, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S34_Conv2d_276x276x1x1_Relu, TiledSpace: Tile0 Iteration Count: 5 + In2 : Ratio: 1.000000, FixDim: 276, VarDim: 16 [ 65], Size: 8832, Total: 8832, Move: 107640 (Decl x 6.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 8844, Move: 9 (Decl x 1.000000) L2 +S34_Conv2d_276x276x1x1_Relu - IterSpace: Tile0 - L1 Memory: 8844, L2Move: 107649, L3Move: 0, Tiling Overhead: 5.997493 +S34_Conv2d_276x276x1x1_Relu Iteration for Tiled Space: 5 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: In2, Size: 4416, Base1: 35496, Base2: 39912 +Ker: S34_Conv2d_276x276x1x1_Relu, Arg: Infos, Size: 12, Base1: 44328, Base2: 0 +S34_Conv2d_276x276x1x1_Relu For Iter Space: 0 Iteration count: 5 (Last one is truncated), Given L1 Memory: 10399, Used L1 Memory: 8844, Reusable Memory: 1552, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 17940 + +==== Process Tiling For User Kernel: S35_AveragePool_13x5 ======================= +S35_AveragePool_13x5 Partition[0] Size = 36999 (Min: 130, Max: 36459), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S35_AveragePool_13x5, Total Raw Memory: 18228 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S35_AveragePool_13x5, Arg: In, Size: 17940, Base1: 0, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Out, Size: 276, Base1: 17940, Base2: 0 +Ker: S35_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 18216, Base2: 0 +S35_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 18228, Reusable Memory: 30508, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S38_Linear_12x276x1x1, Linear: InDim: 276, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S38_Linear_12x276x1x1 ======================= +S38_Linear_12x276x1x1 Partition[0] Size = 7091 (Min: 0, Max: 7175), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S38_Linear_12x276x1x1, Total Raw Memory: 3684 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S38_Linear_12x276x1x1, Arg: In, Size: 276, Base1: 0, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Filter, Size: 3312, Base1: 276, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Bias, Size: 48, Base1: 3588, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Out, Size: 12, Base1: 3636, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Scale, Size: 12, Base1: 3648, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: ScaleN, Size: 12, Base1: 3660, Base2: 0 +Ker: S38_Linear_12x276x1x1, Arg: Infos, Size: 12, Base1: 3672, Base2: 0 +S38_Linear_12x276x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 3684, Reusable Memory: 45052, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S39_SoftMax ======================= + S39_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S39_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S39_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S39_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S39_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S39_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S38_Output[ In] Adding Edge From S38_Linear_12x276x1x1 To S39_SoftMax New + Symbol: S35_Output[ In] Adding Edge From S35_AveragePool_13x5 To S38_Linear_12x276x1x1 New + Symbol: S34_Output[ In] Adding Edge From S34_Conv2d_276x276x1x1_Relu To S35_AveragePool_13x5 New + Symbol: S31_Output[ In] Adding Edge From S31_Conv2d_276x1x3x3_Relu To S34_Conv2d_276x276x1x1_Relu New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_276x276x1x1_Relu To S31_Conv2d_276x1x3x3_Relu New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_276x1x3x3_Relu To S28_Conv2d_276x276x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_276x276x1x1_Relu To S25_Conv2d_276x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_276x1x3x3_Relu To S22_Conv2d_276x276x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_276x276x1x1_Relu To S19_Conv2d_276x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_276x1x3x3_Relu To S16_Conv2d_276x276x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_276x276x1x1_Relu To S13_Conv2d_276x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_276x1x3x3_Relu To S10_Conv2d_276x276x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_276x1x10x4_Relu To S7_Conv2d_276x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S39_SoftMax To __GraphExit__ New + Symbol: S39_Infos[ In] Adding Edge From __GraphEntry__ To S39_SoftMax New + Symbol: S38_Infos[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 New + Symbol: S38_Mul_shift[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S38_Mul_scale[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S38_Linear_12x276x1x1 Exists + Symbol: S35_Infos[ In] Adding Edge From __GraphEntry__ To S35_AveragePool_13x5 New + Symbol: S34_Infos[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu New + Symbol: S34_Mul_shift[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S34_Mul_scale[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_5pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S34_Conv2d_276x276x1x1_Relu Exists + Symbol: S31_Infos[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu New + Symbol: S31_Mul_shift[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S31_Mul_scale[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_5dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S31_Conv2d_276x1x3x3_Relu Exists + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_276x276x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_276x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_276x276x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_276x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_276x276x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_276x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_276x276x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_276x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_276x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 86940 => Alloc: OK + +After Const Allocation, TopL3: 304704, TopL2: 284517 => Alloc: OK + +[FULL] Remapping [163116 .. 284516] to [0 .. 121400] Align compensation: 3 +[PART] Remapping [0 .. 163115] to [121404 .. 284519] Align compensation: 0 +[PART] Remapping [284517 .. 349999] to [284520 .. 350002] Align compensation: 1 +[FULL] Remapping [0 .. 304703] to [0 .. 304703] Align compensation: 0 +[PART] Remapping [304704 .. 6388607] to [304704 .. 6388607] Align compensation: 0 +Symbol allocation for graph KWS_ds_cnn_l_quantCNN is sucessfull, L2: 284517 out of 350000, L3: 304704 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => Dscnnconv_ds_5dw_convweights_q + (null) => Dscnnconv_ds_5dw_convdepthwise + (null) => S31_Mul_scale + (null) => S31_Mul_shift + (null) => S31_Infos + (null) => Dscnnconv_ds_5pw_convweights_q + (null) => Dscnnconv_ds_5pw_convconv2d_fo + (null) => S34_Mul_scale + (null) => S34_Mul_shift + (null) => S34_Infos + (null) => S35_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S38_Mul_scale + (null) => S38_Mul_shift + (null) => S38_Infos + (null) => S39_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 13 14 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_276x1x10x4_Relu, Operations: 2829000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1940, TileOverhead: 3.959184, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 11040, L3_Move: 0, L2_Move: 11040, TileOverhead: 1.000000, L2Buff: 0, Addr: 2024 +CI Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 320 + O Out => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 13544 +CI Buff Scale => S4_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 1748 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 48104 + Kernel Memory : L3: 0, L2: 82195 + Kernel Total Memory: 82195, L3 moves: 0, L2 moves: 83645, Move overhead: 1.017641 + Kernel Operations : 2829000 [KernelOper/GraphOper: 9.924761%], Move/Operation ratio: [L3: 0.000000, L2: 0.029567] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S4_Output --L2-- Size: 69000, L3_Move: 0, L2_Move: 69000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 25656 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28140 +CI Buff Scale => S7_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25104 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 25380 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 46860 + Kernel Memory : L3: 0, L2: 91089 + Kernel Total Memory: 91089, L3 moves: 0, L2 moves: 91089, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.507742] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S7_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S10_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S10_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S13_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S13_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S16_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S16_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S19_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S19_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S22_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S22_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S25_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S25_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 76176, L3_Move: 76176, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S28_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 76176, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.015385, L2: 0.041083] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S31_Conv2d_276x1x3x3_Relu, Operations: 179400 + I In => S28_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_5dw_convweights_q --L2-- Size: 2484, L3_Move: 0, L2_Move: 2484, TileOverhead: 1.000000, L2Buff: 0, Addr: 12056 +CI Buff Bias => Dscnnconv_ds_5dw_convdepthwise --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 14540 +CI Buff Scale => S31_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11504 +CI Buff ScaleN => S31_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 11780 +CI Buff Infos => S31_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45740 + Kernel Memory : L3: 0, L2: 40029 + Kernel Total Memory: 40029, L3 moves: 0, L2 moves: 40029, Move overhead: 1.000000 + Kernel Operations : 179400 [KernelOper/GraphOper: 0.629375%], Move/Operation ratio: [L3: 0.000000, L2: 0.223127] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S31_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S34_Conv2d_276x276x1x1_Relu, Operations: 4951440 + I In2 => S31_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 107640, TileOverhead: 6.000000, L2Buff: 0, Addr: 35496 +CI In1 => Dscnnconv_ds_5pw_convweights_q --L2-- Size: 76176, L3_Move: 0, L2_Move: 76176, TileOverhead: 1.000000, L2Buff: 0, Addr: 1104 +CI Buff Bias => Dscnnconv_ds_5pw_convconv2d_fo --L2-- Size: 1104, L3_Move: 0, L2_Move: 1104, TileOverhead: 1.000000, L2Buff: 0, Addr: 27600 + O Out => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 28704 +CI Buff Scale => S34_Mul_scale --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 34944 +CI Buff ScaleN => S34_Mul_shift --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 35220 +CI Buff Infos => S34_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44328 + Kernel Memory : L3: 0, L2: 113721 + Kernel Total Memory: 113721, L3 moves: 0, L2 moves: 203421, Move overhead: 1.788772 + Kernel Operations : 4951440 [KernelOper/GraphOper: 17.370753%], Move/Operation ratio: [L3: 0.000000, L2: 0.041083] + Successors: 12 + + Living Dynamic Symbols: [S31_Output] [S34_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S35_AveragePool_13x5, Operations: 17940 + I Buff In => S34_Output --L2-- Size: 17940, L3_Move: 0, L2_Move: 17940, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 17940 +CI Buff Infos => S35_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 18216 + Kernel Memory : L3: 0, L2: 18225 + Kernel Total Memory: 18225, L3 moves: 0, L2 moves: 18225, Move overhead: 1.000000 + Kernel Operations : 17940 [KernelOper/GraphOper: 0.062938%], Move/Operation ratio: [L3: 0.000000, L2: 1.015886] + Successors: 13 + + Living Dynamic Symbols: [S34_Output] [S35_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: UKer S38_Linear_12x276x1x1, Operations: 3312 + I Buff In => S35_Output --L2-- Size: 276, L3_Move: 0, L2_Move: 276, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 3312, L3_Move: 0, L2_Move: 3312, TileOverhead: 1.000000, L2Buff: 0, Addr: 276 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 3588 + O Buff Out => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3636 +CI Buff Scale => S38_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3648 +CI Buff ScaleN => S38_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 3660 +CI Buff Infos => S38_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 3672 + Kernel Memory : L3: 0, L2: 3681 + Kernel Total Memory: 3681, L3 moves: 0, L2 moves: 3681, Move overhead: 1.000000 + Kernel Operations : 3312 [KernelOper/GraphOper: 0.011619%], Move/Operation ratio: [L3: 0.000000, L2: 1.111413] + Successors: 14 + + Living Dynamic Symbols: [S35_Output] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 14, Channel 0 0: UKer S39_SoftMax, Operations: 12 + I Buff In => S38_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S39_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000042%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 15 + + Living Dynamic Symbols: [Output_1] [S38_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 15, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 113721 + Graph nodes min global memory: L3: 76176, L2: 113724 + Graph sum of kernel arguments size: 923956, L3 moves: 304704, L2 moves: 1373906, Move overhead: 1.486982 + Graph total operations: 28504464 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 923956, Total L3_Move: 304704, Total L2_Move: 1373906, Tiling Overhead Average: 1.486982 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic -228528 86940 + Const 304704 77844 + Total 76176 113724 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 15]@ 380880 LOAD: L2[ 0: 15]@ 76176 EXEC: L2[ 0: 15]@ 76176 , Size: 11040 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 15]@ 407652 LOAD: L2[ 0: 15]@ 102948 EXEC: L2[ 0: 15]@ 102948 , Size: 1104 + S4_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 419796 LOAD: L2[ 0: 15]@ 115092 EXEC: L2[ 0: 15]@ 115092 , Size: 276 + S4_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420072 LOAD: L2[ 0: 15]@ 115368 EXEC: L2[ 0: 15]@ 115368 , Size: 276 + S4_Infos INSTALL: HyperFlash[ 0: 15]@ 425916 LOAD: L2[ 0: 15]@ 121212 EXEC: L2[ 0: 15]@ 121212 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 395232 LOAD: L2[ 0: 15]@ 90528 EXEC: L2[ 0: 15]@ 90528 , Size: 2484 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 408756 LOAD: L2[ 0: 15]@ 104052 EXEC: L2[ 0: 15]@ 104052 , Size: 1104 + S7_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420348 LOAD: L2[ 0: 15]@ 115644 EXEC: L2[ 0: 15]@ 115644 , Size: 276 + S7_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 420624 LOAD: L2[ 0: 15]@ 115920 EXEC: L2[ 0: 15]@ 115920 , Size: 276 + S7_Infos INSTALL: HyperFlash[ 0: 15]@ 425928 LOAD: L2[ 0: 15]@ 121224 EXEC: L2[ 0: 15]@ 121224 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 0 LOAD: HyperRam[ 0: 15]@ 0 EXEC: L2[ 1: 3]@ 208344 , Size: 76176 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 409860 LOAD: L2[ 0: 15]@ 105156 EXEC: L2[ 0: 15]@ 105156 , Size: 1104 + S10_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 420900 LOAD: L2[ 0: 15]@ 116196 EXEC: L2[ 0: 15]@ 116196 , Size: 276 + S10_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421176 LOAD: L2[ 0: 15]@ 116472 EXEC: L2[ 0: 15]@ 116472 , Size: 276 + S10_Infos INSTALL: HyperFlash[ 0: 15]@ 425940 LOAD: L2[ 0: 15]@ 121236 EXEC: L2[ 0: 15]@ 121236 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 397716 LOAD: L2[ 0: 15]@ 93012 EXEC: L2[ 0: 15]@ 93012 , Size: 2484 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 410964 LOAD: L2[ 0: 15]@ 106260 EXEC: L2[ 0: 15]@ 106260 , Size: 1104 + S13_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 421452 LOAD: L2[ 0: 15]@ 116748 EXEC: L2[ 0: 15]@ 116748 , Size: 276 + S13_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 421728 LOAD: L2[ 0: 15]@ 117024 EXEC: L2[ 0: 15]@ 117024 , Size: 276 + S13_Infos INSTALL: HyperFlash[ 0: 15]@ 425952 LOAD: L2[ 0: 15]@ 121248 EXEC: L2[ 0: 15]@ 121248 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 76176 LOAD: HyperRam[ 0: 15]@ 76176 EXEC: L2[ 4: 5]@ 157284 , Size: 76176 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 412068 LOAD: L2[ 0: 15]@ 107364 EXEC: L2[ 0: 15]@ 107364 , Size: 1104 + S16_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422004 LOAD: L2[ 0: 15]@ 117300 EXEC: L2[ 0: 15]@ 117300 , Size: 276 + S16_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422280 LOAD: L2[ 0: 15]@ 117576 EXEC: L2[ 0: 15]@ 117576 , Size: 276 + S16_Infos INSTALL: HyperFlash[ 0: 15]@ 425964 LOAD: L2[ 0: 15]@ 121260 EXEC: L2[ 0: 15]@ 121260 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 400200 LOAD: L2[ 0: 15]@ 95496 EXEC: L2[ 0: 15]@ 95496 , Size: 2484 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 413172 LOAD: L2[ 0: 15]@ 108468 EXEC: L2[ 0: 15]@ 108468 , Size: 1104 + S19_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 422556 LOAD: L2[ 0: 15]@ 117852 EXEC: L2[ 0: 15]@ 117852 , Size: 276 + S19_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 422832 LOAD: L2[ 0: 15]@ 118128 EXEC: L2[ 0: 15]@ 118128 , Size: 276 + S19_Infos INSTALL: HyperFlash[ 0: 15]@ 425976 LOAD: L2[ 0: 15]@ 121272 EXEC: L2[ 0: 15]@ 121272 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 152352 LOAD: HyperRam[ 0: 15]@ 152352 EXEC: L2[ 6: 7]@ 157284 , Size: 76176 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 414276 LOAD: L2[ 0: 15]@ 109572 EXEC: L2[ 0: 15]@ 109572 , Size: 1104 + S22_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423108 LOAD: L2[ 0: 15]@ 118404 EXEC: L2[ 0: 15]@ 118404 , Size: 276 + S22_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423384 LOAD: L2[ 0: 15]@ 118680 EXEC: L2[ 0: 15]@ 118680 , Size: 276 + S22_Infos INSTALL: HyperFlash[ 0: 15]@ 425988 LOAD: L2[ 0: 15]@ 121284 EXEC: L2[ 0: 15]@ 121284 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 402684 LOAD: L2[ 0: 15]@ 97980 EXEC: L2[ 0: 15]@ 97980 , Size: 2484 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 415380 LOAD: L2[ 0: 15]@ 110676 EXEC: L2[ 0: 15]@ 110676 , Size: 1104 + S25_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 423660 LOAD: L2[ 0: 15]@ 118956 EXEC: L2[ 0: 15]@ 118956 , Size: 276 + S25_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 423936 LOAD: L2[ 0: 15]@ 119232 EXEC: L2[ 0: 15]@ 119232 , Size: 276 + S25_Infos INSTALL: HyperFlash[ 0: 15]@ 426000 LOAD: L2[ 0: 15]@ 121296 EXEC: L2[ 0: 15]@ 121296 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 228528 LOAD: HyperRam[ 0: 15]@ 228528 EXEC: L2[ 8: 9]@ 175224 , Size: 76176 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 416484 LOAD: L2[ 0: 15]@ 111780 EXEC: L2[ 0: 15]@ 111780 , Size: 1104 + S28_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424212 LOAD: L2[ 0: 15]@ 119508 EXEC: L2[ 0: 15]@ 119508 , Size: 276 + S28_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 424488 LOAD: L2[ 0: 15]@ 119784 EXEC: L2[ 0: 15]@ 119784 , Size: 276 + S28_Infos INSTALL: HyperFlash[ 0: 15]@ 426012 LOAD: L2[ 0: 15]@ 121308 EXEC: L2[ 0: 15]@ 121308 , Size: 9 + Dscnnconv_ds_5dw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 405168 LOAD: L2[ 0: 15]@ 100464 EXEC: L2[ 0: 15]@ 100464 , Size: 2484 + Dscnnconv_ds_5dw_convdepthwise INSTALL: HyperFlash[ 0: 15]@ 417588 LOAD: L2[ 0: 15]@ 112884 EXEC: L2[ 0: 15]@ 112884 , Size: 1104 + S31_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 424764 LOAD: L2[ 0: 15]@ 120060 EXEC: L2[ 0: 15]@ 120060 , Size: 276 + S31_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425040 LOAD: L2[ 0: 15]@ 120336 EXEC: L2[ 0: 15]@ 120336 , Size: 276 + S31_Infos INSTALL: HyperFlash[ 0: 15]@ 426024 LOAD: L2[ 0: 15]@ 121320 EXEC: L2[ 0: 15]@ 121320 , Size: 9 + Dscnnconv_ds_5pw_convweights_q INSTALL: HyperFlash[ 0: 15]@ 304704 LOAD: L2[ 0: 15]@ 0 EXEC: L2[ 0: 15]@ 0 , Size: 76176 + Dscnnconv_ds_5pw_convconv2d_fo INSTALL: HyperFlash[ 0: 15]@ 418692 LOAD: L2[ 0: 15]@ 113988 EXEC: L2[ 0: 15]@ 113988 , Size: 1104 + S34_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 425316 LOAD: L2[ 0: 15]@ 120612 EXEC: L2[ 0: 15]@ 120612 , Size: 276 + S34_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 425592 LOAD: L2[ 0: 15]@ 120888 EXEC: L2[ 0: 15]@ 120888 , Size: 276 + S34_Infos INSTALL: HyperFlash[ 0: 15]@ 426036 LOAD: L2[ 0: 15]@ 121332 EXEC: L2[ 0: 15]@ 121332 , Size: 9 + S35_Infos INSTALL: HyperFlash[ 0: 15]@ 426048 LOAD: L2[ 0: 15]@ 121344 EXEC: L2[ 0: 15]@ 121344 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 15]@ 391920 LOAD: L2[ 0: 15]@ 87216 EXEC: L2[ 0: 15]@ 87216 , Size: 3312 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 15]@ 425868 LOAD: L2[ 0: 15]@ 121164 EXEC: L2[ 0: 15]@ 121164 , Size: 48 + S38_Mul_scale INSTALL: HyperFlash[ 0: 15]@ 426060 LOAD: L2[ 0: 15]@ 121356 EXEC: L2[ 0: 15]@ 121356 , Size: 12 + S38_Mul_shift INSTALL: HyperFlash[ 0: 15]@ 426072 LOAD: L2[ 0: 15]@ 121368 EXEC: L2[ 0: 15]@ 121368 , Size: 12 + S38_Infos INSTALL: HyperFlash[ 0: 15]@ 426084 LOAD: L2[ 0: 15]@ 121380 EXEC: L2[ 0: 15]@ 121380 , Size: 9 + S39_Infos INSTALL: HyperFlash[ 0: 15]@ 426096 LOAD: L2[ 0: 15]@ 121392 EXEC: L2[ 0: 15]@ 121392 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 139344 , Size: 69000 + S7_Output EXEC: L2[ 2: 3]@ 121404 , Size: 17940 + S10_Output EXEC: L2[ 3: 4]@ 139344 , Size: 17940 + S13_Output EXEC: L2[ 4: 5]@ 121404 , Size: 17940 + S16_Output EXEC: L2[ 5: 6]@ 139344 , Size: 17940 + S19_Output EXEC: L2[ 6: 7]@ 121404 , Size: 17940 + S22_Output EXEC: L2[ 7: 8]@ 139344 , Size: 17940 + S25_Output EXEC: L2[ 8: 9]@ 157284 , Size: 17940 + S28_Output EXEC: L2[ 9: 10]@ 121404 , Size: 17940 + S31_Output EXEC: L2[ 10: 11]@ 139344 , Size: 17940 + S34_Output EXEC: L2[ 11: 12]@ 121404 , Size: 17940 + S35_Output EXEC: L2[ 12: 13]@ 139344 , Size: 276 + S38_Output EXEC: L2[ 13: 14]@ 121404 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_276x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S31_Conv2d_276x1x3x3_Relu +Generating Code For User Kernel: S34_Conv2d_276x276x1x1_Relu +Generating Code For User Kernel: S35_AveragePool_13x5 +Generating Code For User Kernel: S38_Linear_12x276x1x1 +Generating Code For User Kernel: S39_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 11040 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convweights_q.tensor: 2484 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5dw_convdepthwise.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S31_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convweights_q.tensor: 76176 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_5pw_convconv2d_fo.tensor: 276 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_scale.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Mul_shift.tensor: 276 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S34_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S35_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 3312 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S38_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S39_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_l_quant_L3_Flash_Const.dat (size 426108) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 48116 +L2 Memory size (Bytes) : Given: 350000, Used: 284517 +L3 Memory size (Bytes) : Given: 6388608, Used: 304704 + +L3 Memory bandwidth for 1 graph run : 304704 Bytes +L2 Memory bandwidth for 1 graph run : 1373906 Bytes +Sum of all Kernels arguments size : 923956 Bytes +Tiling Bandwith overhead : 1.486982 Move/KerArgSize +Sum of baseline bandwidth : 50099340 Bytes +Percentage of baseline BW for L2 : 2.74236 % +Percentage of baseline BW for L3 : 0.6082 % +Sum of all Kernels operations : 28504464 Operations +Total amount of flash coefficients : 426108 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_l_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_l_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_l_quantKernels.h Header file for the generated C code + KWS_ds_cnn_l_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DLARGE -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 12:46:52.955716 140663696426816 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 12:46:52.958069 140663696426816 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 12:46:52.959074 140663696426816 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 12:46:52.962191: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 12:46:52.983757: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 12:46:52.984592: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55638210d930 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 12:46:52.984707: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 12:46:52.990039: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 12:46:52.990319: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 12:46:52.990421: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 12:50:32.451610 140663696426816 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 12:50:32.454367 140663696426816 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 12:50:33.111438 140663696426816 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 12:50:33.112381 140663696426816 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 12:56:17.174039 140663696426816 test_accuracy_emul.py:157] Test set size:4890 +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 194/ 200 Accuracy: 97.00% +Pred/Tot: 286/ 300 Accuracy: 95.33% +Pred/Tot: 380/ 400 Accuracy: 95.00% +Pred/Tot: 477/ 500 Accuracy: 95.40% +Pred/Tot: 571/ 600 Accuracy: 95.17% +Pred/Tot: 669/ 700 Accuracy: 95.57% +Pred/Tot: 760/ 800 Accuracy: 95.00% +Pred/Tot: 853/ 900 Accuracy: 94.78% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1039/1100 Accuracy: 94.45% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1228/1300 Accuracy: 94.46% +Pred/Tot: 1322/1400 Accuracy: 94.43% +Pred/Tot: 1416/1500 Accuracy: 94.40% +Pred/Tot: 1513/1600 Accuracy: 94.56% +Pred/Tot: 1609/1700 Accuracy: 94.65% +Pred/Tot: 1706/1800 Accuracy: 94.78% +Pred/Tot: 1801/1900 Accuracy: 94.79% +Pred/Tot: 1898/2000 Accuracy: 94.90% +Pred/Tot: 1995/2100 Accuracy: 95.00% +Pred/Tot: 2091/2200 Accuracy: 95.05% +Pred/Tot: 2186/2300 Accuracy: 95.04% +Pred/Tot: 2278/2400 Accuracy: 94.92% +Pred/Tot: 2368/2500 Accuracy: 94.72% +Pred/Tot: 2460/2600 Accuracy: 94.62% +Pred/Tot: 2557/2700 Accuracy: 94.70% +Pred/Tot: 2653/2800 Accuracy: 94.75% +Pred/Tot: 2748/2900 Accuracy: 94.76% +Pred/Tot: 2843/3000 Accuracy: 94.77% +Pred/Tot: 2940/3100 Accuracy: 94.84% +Pred/Tot: 3035/3200 Accuracy: 94.84% +Pred/Tot: 3131/3300 Accuracy: 94.88% +Pred/Tot: 3222/3400 Accuracy: 94.76% +Pred/Tot: 3318/3500 Accuracy: 94.80% +Pred/Tot: 3414/3600 Accuracy: 94.83% +Pred/Tot: 3507/3700 Accuracy: 94.78% +Pred/Tot: 3604/3800 Accuracy: 94.84% +Pred/Tot: 3694/3900 Accuracy: 94.72% +Pred/Tot: 3788/4000 Accuracy: 94.70% +Pred/Tot: 3884/4100 Accuracy: 94.73% +Pred/Tot: 3979/4200 Accuracy: 94.74% +Pred/Tot: 4072/4300 Accuracy: 94.70% +Pred/Tot: 4169/4400 Accuracy: 94.75% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4211/4444 Accuracy: 94.76% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 337 0 1 4 7 3 3 7 2 3 4] + [ 0 2 384 3 0 1 3 2 0 0 0 2] + [ 1 11 0 381 0 1 3 0 0 1 1 7] + [ 0 2 1 0 327 0 2 0 2 10 5 1] + [ 0 0 0 9 0 360 1 0 0 1 3 3] + [ 0 3 5 0 0 0 341 2 0 0 0 1] + [ 0 6 0 1 0 0 2 352 0 1 1 0] + [ 0 8 0 2 3 0 0 1 341 8 0 0] + [ 0 2 1 1 17 0 1 1 6 340 3 1] + [ 1 3 0 0 7 1 1 0 0 3 334 0] + [ 0 11 0 9 0 4 0 0 1 3 1 343]] +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 187/ 200 Accuracy: 93.50% +Pred/Tot: 282/ 300 Accuracy: 94.00% +Pred/Tot: 377/ 400 Accuracy: 94.25% +Pred/Tot: 469/ 500 Accuracy: 93.80% +Pred/Tot: 563/ 600 Accuracy: 93.83% +Pred/Tot: 659/ 700 Accuracy: 94.14% +Pred/Tot: 755/ 800 Accuracy: 94.38% +Pred/Tot: 852/ 900 Accuracy: 94.67% +Pred/Tot: 947/1000 Accuracy: 94.70% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1136/1200 Accuracy: 94.67% +Pred/Tot: 1229/1300 Accuracy: 94.54% +Pred/Tot: 1324/1400 Accuracy: 94.57% +Pred/Tot: 1417/1500 Accuracy: 94.47% +Pred/Tot: 1514/1600 Accuracy: 94.62% +Pred/Tot: 1607/1700 Accuracy: 94.53% +Pred/Tot: 1705/1800 Accuracy: 94.72% +Pred/Tot: 1795/1900 Accuracy: 94.47% +Pred/Tot: 1890/2000 Accuracy: 94.50% +Traceback (most recent call last): + File "utils/test_accuracy_emul.py", line 311, in + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run + _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 300, in run + _run_main(main, args) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main + sys.exit(main(argv)) + File "utils/test_accuracy_emul.py", line 175, in main + ex_stream = os.popen("./{} {}".format(executable, testing_wav_file[0])) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/os.py", line 990, in popen + bufsize=buffering) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/subprocess.py", line 800, in __init__ + restore_signals, start_new_session) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/subprocess.py", line 1482, in _execute_child + restore_signals, start_new_session, preexec_fn) +KeyboardInterrupt +make: *** [Makefile:124: test_accuracy] Error 1 diff --git a/accuracy_log/log_test_large_spectr.txt b/accuracy_log/log_test_large_spectr.txt new file mode 100644 index 0000000..9c7c016 --- /dev/null +++ b/accuracy_log/log_test_large_spectr.txt @@ -0,0 +1,14 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=0 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' diff --git a/accuracy_log/log_test_large_spectr_v2.txt b/accuracy_log/log_test_large_spectr_v2.txt new file mode 100644 index 0000000..3ff79c5 --- /dev/null +++ b/accuracy_log/log_test_large_spectr_v2.txt @@ -0,0 +1,27 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +make[1]: *** No rule to make target 'model/KWS_ds_cnn_l_quant.tflite', needed by 'BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_l_quant.tflite'. Stop. +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +Traceback (most recent call last): + File "utils/test_accuracy_emul.py", line 311, in + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run + _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 300, in run + _run_main(main, args) + File "/home/marco-gwt/anaconda3/envs/tf1.15/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main + sys.exit(main(argv)) + File "utils/test_accuracy_emul.py", line 90, in main + raise Exception("Executable not generated correctly") +Exception: Executable not generated correctly +make: *** [Makefile:125: test_accuracy] Error 1 diff --git a/accuracy_log/log_test_large_spectr_v2_norm9.txt b/accuracy_log/log_test_large_spectr_v2_norm9.txt new file mode 100644 index 0000000..fc24340 --- /dev/null +++ b/accuracy_log/log_test_large_spectr_v2_norm9.txt @@ -0,0 +1,11 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_l_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=0 LARGE=1 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' diff --git a/accuracy_log/log_test_medium_hp_power.txt b/accuracy_log/log_test_medium_hp_power.txt new file mode 100644 index 0000000..3a603b4 --- /dev/null +++ b/accuracy_log/log_test_medium_hp_power.txt @@ -0,0 +1,1499 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_m_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_m_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x172 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 172x25x10 | 0 | 43490 | 7052 | 1.72M | F 172x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 172x25x10 | 172x13x5 | 1 | 54180 | 181 | 100.62K | F 172x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 2 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 3 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 4 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 5 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 6 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 7 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 8 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 172x13x5 | 172x1x1 | 9 | 11352 | 0 | 11.35K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 172x1x1 | 12 | 10 | 184 | 2076 | 2.06K | F 12x172x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 54180 | 128876 | 9.83M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 183056 | 9.83M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 54438<16.33 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -16.45>chan | Q32.0 | Q32.0 | +| | D_0_1 | 4438<16.33 | 91819<19.55 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -19.70>chan | Q32.0 | Q32.0 | +| | | 1819<19.55 | 14758<16.53 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -16.66>chan | Q32.0 | Q32.0 | +| | D_0_3 | 4758<16.53 | 06785<14.87 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -14.98>chan | Q32.0 | Q32.0 | +| | | 6785<14.87 | 44783<10.60 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.68>chan | Q32.0 | Q32.0 | +| | D_0_5 | 4783<10.60 | 07906<11.82 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -11.91>chan | Q32.0 | Q32.0 | +| | | 7906<11.82 | 0190<9.61 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -9.69>chan | Q32.0 | Q32.0 | +| | D_0_7 | 190<9.61 | 31889<11.34 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -11.43>chan | Q32.0 | Q32.0 | +| | | 1889<11.34 | 4916<9.16 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -9.24>chan | Q32.0 | Q32.0 | +| | 0_10 | 916<9.16 | 00000<35.45 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -35.73 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S1_Conv2d_172x1x10x4_Relu ======================= +S1_Conv2d_172x1x10x4_Relu Partition[0] Size = 194945 (Min: 200, Max: 275249), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 bytes will require 688 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 6880, Total: 8352, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20672, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45312, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45324, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45324, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S1_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 bytes +S1_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +S1_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S2_Conv2d_172x1x3x3_Relu ======================= +S2_Conv2d_172x1x3x3_Relu Partition[0] Size = 63681 (Min: 60, Max: 165385), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 688 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 1548 bytes buffer +S2_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +S2_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_172x172x1x1_Relu ======================= +S3_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S3_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S3_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x3x3_Relu ======================= +S4_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S4_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S4_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_172x172x1x1_Relu ======================= +S5_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S5_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S5_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S6_Conv2d_172x1x3x3_Relu ======================= +S6_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S6_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S6_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_172x172x1x1_Relu ======================= +S7_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S7_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S7_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S8_Conv2d_172x1x3x3_Relu ======================= +S8_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S8_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S8_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_172x172x1x1_Relu ======================= +S9_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S9_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S9_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S10_AveragePool_13x5 ======================= +S10_AveragePool_13x5 Partition[0] Size = 23069 (Min: 130, Max: 22749), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x172x1x1 ======================= +S11_Linear_12x172x1x1 Partition[0] Size = 4505 (Min: 0, Max: 4673), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x172x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_13x5 To S11_Linear_12x172x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_172x172x1x1_Relu To S10_AveragePool_13x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_172x1x3x3_Relu To S9_Conv2d_172x172x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x172x1x1_Relu To S8_Conv2d_172x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_172x1x3x3_Relu To S7_Conv2d_172x172x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_172x172x1x1_Relu To S6_Conv2d_172x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x3x3_Relu To S5_Conv2d_172x172x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_172x172x1x1_Relu To S4_Conv2d_172x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_172x1x3x3_Relu To S3_Conv2d_172x172x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_172x1x10x4_Relu To S2_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_13x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => S1_Weights --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => S1_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S1_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => S2_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S2_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S3_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S3_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S4_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S5_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S5_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S6_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S6_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S7_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S8_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S8_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S9_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S9_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_13x5, Operations: 11180 + I Buff In => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x172x1x1, Operations: 2064 + I Buff In => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 409672, Move overhead: 1.002373 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 409672, Tiling Overhead Average: 1.002373 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S2_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S3_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S4_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S5_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S6_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S7_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S8_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S9_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S11_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_13x5 +Generating Code For User Kernel: S11_Linear_12x172x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45484 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 409672 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.002373 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 2.60742 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_MEDIUM.h --use_power --use_high_prec +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DMEDIUM +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 33086 (Min: 0, Max: 33814), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 6.400000, Size: 8192, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 6.400000, Size: 8192, Total: 22864, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 23888, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 24048, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 24052, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 26100, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 30196, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 32244, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 32564, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 33552, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 33592, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 36792, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 36792, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 36792, Reusable Memory: 11944, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 36792 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 196/ 200 Accuracy: 98.00% +Pred/Tot: 288/ 300 Accuracy: 96.00% +Pred/Tot: 380/ 400 Accuracy: 95.00% +Pred/Tot: 475/ 500 Accuracy: 95.00% +Pred/Tot: 569/ 600 Accuracy: 94.83% +Pred/Tot: 664/ 700 Accuracy: 94.86% +Pred/Tot: 757/ 800 Accuracy: 94.62% +Pred/Tot: 853/ 900 Accuracy: 94.78% +Pred/Tot: 949/1000 Accuracy: 94.90% +Pred/Tot: 1042/1100 Accuracy: 94.73% +Pred/Tot: 1132/1200 Accuracy: 94.33% +Pred/Tot: 1220/1300 Accuracy: 93.85% +Pred/Tot: 1314/1400 Accuracy: 93.86% +Pred/Tot: 1403/1500 Accuracy: 93.53% +Pred/Tot: 1501/1600 Accuracy: 93.81% +Pred/Tot: 1594/1700 Accuracy: 93.76% +Pred/Tot: 1692/1800 Accuracy: 94.00% +Pred/Tot: 1788/1900 Accuracy: 94.11% +Pred/Tot: 1885/2000 Accuracy: 94.25% +Pred/Tot: 1980/2100 Accuracy: 94.29% +Pred/Tot: 2076/2200 Accuracy: 94.36% +Pred/Tot: 2173/2300 Accuracy: 94.48% +Pred/Tot: 2267/2400 Accuracy: 94.46% +Pred/Tot: 2357/2500 Accuracy: 94.28% +Pred/Tot: 2450/2600 Accuracy: 94.23% +Pred/Tot: 2545/2700 Accuracy: 94.26% +Pred/Tot: 2638/2800 Accuracy: 94.21% +Pred/Tot: 2731/2900 Accuracy: 94.17% +Pred/Tot: 2824/3000 Accuracy: 94.13% +Pred/Tot: 2921/3100 Accuracy: 94.23% +Pred/Tot: 3017/3200 Accuracy: 94.28% +Pred/Tot: 3115/3300 Accuracy: 94.39% +Pred/Tot: 3205/3400 Accuracy: 94.26% +Pred/Tot: 3298/3500 Accuracy: 94.23% +Pred/Tot: 3395/3600 Accuracy: 94.31% +Pred/Tot: 3484/3700 Accuracy: 94.16% +Pred/Tot: 3581/3800 Accuracy: 94.24% +Pred/Tot: 3672/3900 Accuracy: 94.15% +Pred/Tot: 3767/4000 Accuracy: 94.17% +Pred/Tot: 3863/4100 Accuracy: 94.22% +Pred/Tot: 3954/4200 Accuracy: 94.14% +Pred/Tot: 4047/4300 Accuracy: 94.12% +Pred/Tot: 4139/4400 Accuracy: 94.07% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4180/4444 Accuracy: 94.06% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 329 3 3 2 7 1 7 10 1 4 3] + [ 0 3 387 0 0 1 3 1 0 0 0 2] + [ 0 11 0 381 0 6 1 0 1 0 1 5] + [ 0 5 1 0 321 2 2 0 2 10 6 1] + [ 0 4 0 9 0 357 0 0 0 0 4 3] + [ 0 2 10 0 0 0 333 6 0 0 0 1] + [ 0 8 1 1 0 1 0 351 0 0 1 0] + [ 1 7 0 1 5 2 0 1 341 5 0 0] + [ 0 5 0 2 16 1 1 0 4 340 2 2] + [ 1 1 0 0 10 1 0 0 0 3 334 0] + [ 0 6 1 17 1 5 0 0 0 3 4 335]] +Pred/Tot: 94/ 100 Accuracy: 94.00% +Pred/Tot: 190/ 200 Accuracy: 95.00% +Pred/Tot: 282/ 300 Accuracy: 94.00% +Pred/Tot: 376/ 400 Accuracy: 94.00% +Pred/Tot: 468/ 500 Accuracy: 93.60% +Pred/Tot: 563/ 600 Accuracy: 93.83% +Pred/Tot: 659/ 700 Accuracy: 94.14% +Pred/Tot: 754/ 800 Accuracy: 94.25% +Pred/Tot: 849/ 900 Accuracy: 94.33% +Pred/Tot: 946/1000 Accuracy: 94.60% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1134/1200 Accuracy: 94.50% +Pred/Tot: 1227/1300 Accuracy: 94.38% +Pred/Tot: 1319/1400 Accuracy: 94.21% +Pred/Tot: 1413/1500 Accuracy: 94.20% +Pred/Tot: 1508/1600 Accuracy: 94.25% +Pred/Tot: 1605/1700 Accuracy: 94.41% +Pred/Tot: 1703/1800 Accuracy: 94.61% +Pred/Tot: 1790/1900 Accuracy: 94.21% +Pred/Tot: 1882/2000 Accuracy: 94.10% +Pred/Tot: 1973/2100 Accuracy: 93.95% +Pred/Tot: 2068/2200 Accuracy: 94.00% +Pred/Tot: 2160/2300 Accuracy: 93.91% +Pred/Tot: 2252/2400 Accuracy: 93.83% +Pred/Tot: 2343/2500 Accuracy: 93.72% +Pred/Tot: 2437/2600 Accuracy: 93.73% +Pred/Tot: 2531/2700 Accuracy: 93.74% +Pred/Tot: 2627/2800 Accuracy: 93.82% +Pred/Tot: 2720/2900 Accuracy: 93.79% +Pred/Tot: 2815/3000 Accuracy: 93.83% +Pred/Tot: 2909/3100 Accuracy: 93.84% +Pred/Tot: 3004/3200 Accuracy: 93.88% +Pred/Tot: 3101/3300 Accuracy: 93.97% +Pred/Tot: 3194/3400 Accuracy: 93.94% +Pred/Tot: 3289/3500 Accuracy: 93.97% +Pred/Tot: 3385/3600 Accuracy: 94.03% +Pred/Tot: 3478/3700 Accuracy: 94.00% +Pred/Tot: 3573/3800 Accuracy: 94.03% +Pred/Tot: 3668/3900 Accuracy: 94.05% +Pred/Tot: 3761/4000 Accuracy: 94.03% +Pred/Tot: 3853/4100 Accuracy: 93.98% +Pred/Tot: 3948/4200 Accuracy: 94.00% +Pred/Tot: 4046/4300 Accuracy: 94.09% +Pred/Tot: 4140/4400 Accuracy: 94.09% +Pred/Tot: 4235/4500 Accuracy: 94.11% +Pred/Tot: 4325/4600 Accuracy: 94.02% +Pred/Tot: 4420/4700 Accuracy: 94.04% +Pred/Tot: 4516/4800 Accuracy: 94.08% + +FINAL TESTING ACCURACY: +Pred/Tot: 4598/4889 Accuracy: 94.05% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 372 0 4 4 5 2 5 6 2 5 3] + [ 0 7 406 5 0 0 0 0 0 0 1 0] + [ 0 3 1 389 1 5 0 0 0 1 0 5] + [ 0 9 0 0 395 2 0 0 3 9 7 0] + [ 0 6 1 17 0 370 1 0 1 0 2 8] + [ 0 8 4 2 0 0 395 3 0 0 0 0] + [ 0 15 0 0 0 2 4 373 1 0 1 0] + [ 0 10 0 0 2 10 0 0 361 11 1 1] + [ 0 5 0 1 8 1 0 0 8 372 1 6] + [ 0 2 0 0 5 3 1 1 1 1 396 1] + [ 0 5 2 24 0 5 0 0 2 2 1 361]] diff --git a/accuracy_log/log_test_medium_hp_power_v2.txt b/accuracy_log/log_test_medium_hp_power_v2.txt new file mode 100644 index 0000000..ae452dc --- /dev/null +++ b/accuracy_log/log_test_medium_hp_power_v2.txt @@ -0,0 +1,1842 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +unified_quantizer - forwards SOFTMAX_0_11 in: -35.73<(i8-0.00)*0.27912489<35.45 out: None stop [] fusion False +unified_quantizer - handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +unified_quantizer - forwards in edge 0 does not match was -35.73<(i8-0.00)*0.27912489<35.45 need -64.00<(i8-0.00)*0.50000000<63.50 forced +unified_quantizer - backwards FULLY_CONNECTED_0_10 in: -9.24<(i8-0.00)*0.07214916<9.16,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 197/ 200 Accuracy: 98.50% +Pred/Tot: 287/ 300 Accuracy: 95.67% +Pred/Tot: 382/ 400 Accuracy: 95.50% +Pred/Tot: 477/ 500 Accuracy: 95.40% +Pred/Tot: 569/ 600 Accuracy: 94.83% +Pred/Tot: 663/ 700 Accuracy: 94.71% +Pred/Tot: 756/ 800 Accuracy: 94.50% +Pred/Tot: 848/ 900 Accuracy: 94.22% +Pred/Tot: 944/1000 Accuracy: 94.40% +Pred/Tot: 1037/1100 Accuracy: 94.27% +Pred/Tot: 1130/1200 Accuracy: 94.17% +Pred/Tot: 1216/1300 Accuracy: 93.54% +Pred/Tot: 1310/1400 Accuracy: 93.57% +Pred/Tot: 1401/1500 Accuracy: 93.40% +Pred/Tot: 1499/1600 Accuracy: 93.69% +Pred/Tot: 1592/1700 Accuracy: 93.65% +Pred/Tot: 1689/1800 Accuracy: 93.83% +Pred/Tot: 1785/1900 Accuracy: 93.95% +Pred/Tot: 1882/2000 Accuracy: 94.10% +Pred/Tot: 1976/2100 Accuracy: 94.10% +Pred/Tot: 2072/2200 Accuracy: 94.18% +Pred/Tot: 2169/2300 Accuracy: 94.30% +Pred/Tot: 2261/2400 Accuracy: 94.21% +Pred/Tot: 2353/2500 Accuracy: 94.12% +Pred/Tot: 2444/2600 Accuracy: 94.00% +Pred/Tot: 2539/2700 Accuracy: 94.04% +Pred/Tot: 2634/2800 Accuracy: 94.07% +Pred/Tot: 2724/2900 Accuracy: 93.93% +Pred/Tot: 2817/3000 Accuracy: 93.90% +Pred/Tot: 2915/3100 Accuracy: 94.03% +Pred/Tot: 3011/3200 Accuracy: 94.09% +Pred/Tot: 3108/3300 Accuracy: 94.18% +Pred/Tot: 3195/3400 Accuracy: 93.97% +Pred/Tot: 3289/3500 Accuracy: 93.97% +Pred/Tot: 3387/3600 Accuracy: 94.08% +Pred/Tot: 3478/3700 Accuracy: 94.00% +Pred/Tot: 3573/3800 Accuracy: 94.03% +Pred/Tot: 3665/3900 Accuracy: 93.97% +Pred/Tot: 3759/4000 Accuracy: 93.97% +Pred/Tot: 3853/4100 Accuracy: 93.98% +Pred/Tot: 3946/4200 Accuracy: 93.95% +Pred/Tot: 4038/4300 Accuracy: 93.91% +Pred/Tot: 4130/4400 Accuracy: 93.86% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4171/4444 Accuracy: 93.86% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 329 2 2 2 8 3 7 11 1 4 2] + [ 0 4 386 1 0 3 1 0 0 0 0 2] + [ 0 11 2 374 0 7 1 0 0 0 3 8] + [ 0 2 1 1 322 0 3 0 3 10 7 1] + [ 0 4 0 9 0 358 0 0 0 1 2 3] + [ 0 5 8 2 0 0 334 3 0 0 0 0] + [ 0 8 1 1 0 1 0 351 0 0 1 0] + [ 1 5 1 1 5 2 0 1 339 8 0 0] + [ 0 4 0 1 17 2 1 1 4 339 2 2] + [ 1 1 0 0 8 1 0 0 0 3 334 2] + [ 0 10 0 17 1 3 0 0 1 2 4 334]] +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 280/ 300 Accuracy: 93.33% +Pred/Tot: 372/ 400 Accuracy: 93.00% +Pred/Tot: 463/ 500 Accuracy: 92.60% +Pred/Tot: 558/ 600 Accuracy: 93.00% +Pred/Tot: 654/ 700 Accuracy: 93.43% +Pred/Tot: 748/ 800 Accuracy: 93.50% +Pred/Tot: 842/ 900 Accuracy: 93.56% +Pred/Tot: 938/1000 Accuracy: 93.80% +Pred/Tot: 1029/1100 Accuracy: 93.55% +Pred/Tot: 1123/1200 Accuracy: 93.58% +Pred/Tot: 1216/1300 Accuracy: 93.54% +Pred/Tot: 1307/1400 Accuracy: 93.36% +Pred/Tot: 1402/1500 Accuracy: 93.47% +Pred/Tot: 1496/1600 Accuracy: 93.50% +Pred/Tot: 1591/1700 Accuracy: 93.59% +Pred/Tot: 1689/1800 Accuracy: 93.83% +Pred/Tot: 1777/1900 Accuracy: 93.53% +Pred/Tot: 1868/2000 Accuracy: 93.40% +Pred/Tot: 1959/2100 Accuracy: 93.29% +Pred/Tot: 2053/2200 Accuracy: 93.32% +Pred/Tot: 2145/2300 Accuracy: 93.26% +Pred/Tot: 2239/2400 Accuracy: 93.29% +Pred/Tot: 2332/2500 Accuracy: 93.28% +Pred/Tot: 2425/2600 Accuracy: 93.27% +Pred/Tot: 2520/2700 Accuracy: 93.33% +Pred/Tot: 2616/2800 Accuracy: 93.43% +Pred/Tot: 2709/2900 Accuracy: 93.41% +Pred/Tot: 2803/3000 Accuracy: 93.43% +Pred/Tot: 2898/3100 Accuracy: 93.48% +Pred/Tot: 2995/3200 Accuracy: 93.59% +Pred/Tot: 3091/3300 Accuracy: 93.67% +Pred/Tot: 3184/3400 Accuracy: 93.65% +Pred/Tot: 3280/3500 Accuracy: 93.71% +Pred/Tot: 3377/3600 Accuracy: 93.81% +Pred/Tot: 3471/3700 Accuracy: 93.81% +Pred/Tot: 3566/3800 Accuracy: 93.84% +Pred/Tot: 3662/3900 Accuracy: 93.90% +Pred/Tot: 3754/4000 Accuracy: 93.85% +Pred/Tot: 3847/4100 Accuracy: 93.83% +Pred/Tot: 3940/4200 Accuracy: 93.81% +Pred/Tot: 4037/4300 Accuracy: 93.88% +Pred/Tot: 4133/4400 Accuracy: 93.93% +Pred/Tot: 4226/4500 Accuracy: 93.91% +Pred/Tot: 4313/4600 Accuracy: 93.76% +Pred/Tot: 4408/4700 Accuracy: 93.79% +Pred/Tot: 4504/4800 Accuracy: 93.83% + +FINAL TESTING ACCURACY: +Pred/Tot: 4585/4889 Accuracy: 93.78% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 373 1 4 2 4 2 6 7 2 5 2] + [ 0 6 406 5 0 0 1 0 0 0 1 0] + [ 0 7 1 383 2 7 1 0 0 0 0 4] + [ 0 8 0 0 393 2 0 0 4 10 8 0] + [ 0 6 1 15 0 373 0 0 1 0 3 7] + [ 0 8 5 3 0 0 394 2 0 0 0 0] + [ 0 14 0 0 0 2 3 375 1 0 1 0] + [ 0 8 0 0 2 9 0 0 362 12 1 2] + [ 0 7 0 1 7 1 0 0 10 370 1 5] + [ 0 3 0 0 7 5 0 1 1 1 393 0] + [ 0 10 0 25 1 6 1 0 2 1 1 355]] diff --git a/accuracy_log/log_test_medium_hp_spectr.txt b/accuracy_log/log_test_medium_hp_spectr.txt new file mode 100644 index 0000000..b088114 --- /dev/null +++ b/accuracy_log/log_test_medium_hp_spectr.txt @@ -0,0 +1,1499 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=0 USE_HIGH_PREC=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_m_quant.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_m_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x172 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 172x25x10 | 0 | 43490 | 7052 | 1.72M | F 172x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 172x25x10 | 172x13x5 | 1 | 54180 | 181 | 100.62K | F 172x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 2 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 3 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 4 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 5 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 6 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 7 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 8 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 172x13x5 | 172x1x1 | 9 | 11352 | 0 | 11.35K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 172x1x1 | 12 | 10 | 184 | 2076 | 2.06K | F 12x172x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 54180 | 128876 | 9.83M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 183056 | 9.83M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 09111<35.57 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -35.85>chan | Q32.0 | Q32.0 | +| | D_0_1 | 9111<35.57 | 17583<35.07 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -35.35>chan | Q32.0 | Q32.0 | +| | | 7583<35.07 | 67131<30.57 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -30.81>chan | Q32.0 | Q32.0 | +| | D_0_3 | 7131<30.57 | 10654<27.19 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -27.41>chan | Q32.0 | Q32.0 | +| | | 0654<27.19 | 74510<17.87 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -18.02>chan | Q32.0 | Q32.0 | +| | D_0_5 | 4510<17.87 | 73493<17.37 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -17.50>chan | Q32.0 | Q32.0 | +| | | 3493<17.37 | 07283<14.74 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -14.86>chan | Q32.0 | Q32.0 | +| | D_0_7 | 7283<14.74 | 30772<17.31 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -17.45>chan | Q32.0 | Q32.0 | +| | | 0772<17.31 | 2234<9.18 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -9.26>chan | Q32.0 | Q32.0 | +| | 0_10 | 234<9.18 | 00000<32.38 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -32.63 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S1_Conv2d_172x1x10x4_Relu ======================= +S1_Conv2d_172x1x10x4_Relu Partition[0] Size = 194945 (Min: 200, Max: 275249), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 bytes will require 688 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 6880, Total: 8352, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20672, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45312, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45324, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45324, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S1_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 bytes +S1_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +S1_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S2_Conv2d_172x1x3x3_Relu ======================= +S2_Conv2d_172x1x3x3_Relu Partition[0] Size = 63681 (Min: 60, Max: 165385), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 688 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 1548 bytes buffer +S2_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +S2_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_172x172x1x1_Relu ======================= +S3_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S3_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S3_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x3x3_Relu ======================= +S4_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S4_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S4_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_172x172x1x1_Relu ======================= +S5_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S5_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S5_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S6_Conv2d_172x1x3x3_Relu ======================= +S6_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S6_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S6_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_172x172x1x1_Relu ======================= +S7_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S7_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S7_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S8_Conv2d_172x1x3x3_Relu ======================= +S8_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S8_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S8_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_172x172x1x1_Relu ======================= +S9_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S9_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S9_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S10_AveragePool_13x5 ======================= +S10_AveragePool_13x5 Partition[0] Size = 23069 (Min: 130, Max: 22749), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x172x1x1 ======================= +S11_Linear_12x172x1x1 Partition[0] Size = 4505 (Min: 0, Max: 4673), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x172x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_13x5 To S11_Linear_12x172x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_172x172x1x1_Relu To S10_AveragePool_13x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_172x1x3x3_Relu To S9_Conv2d_172x172x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x172x1x1_Relu To S8_Conv2d_172x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_172x1x3x3_Relu To S7_Conv2d_172x172x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_172x172x1x1_Relu To S6_Conv2d_172x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x3x3_Relu To S5_Conv2d_172x172x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_172x172x1x1_Relu To S4_Conv2d_172x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_172x1x3x3_Relu To S3_Conv2d_172x172x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_172x1x10x4_Relu To S2_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_13x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => S1_Weights --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => S1_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S1_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => S2_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S2_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S3_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S3_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S4_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S5_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S5_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S6_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S6_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S7_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S8_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S8_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S9_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S9_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_13x5, Operations: 11180 + I Buff In => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x172x1x1, Operations: 2064 + I Buff In => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 409672, Move overhead: 1.002373 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 409672, Tiling Overhead Average: 1.002373 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S2_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S3_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S4_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S5_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S6_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S7_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S8_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S9_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S11_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_13x5 +Generating Code For User Kernel: S11_Linear_12x172x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45484 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 409672 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.002373 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 2.60742 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_MEDIUM.h --use_high_prec +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DMEDIUM +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 33086 (Min: 0, Max: 33814), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 6.400000, Size: 8192, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 6.400000, Size: 8192, Total: 22864, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 23888, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 24048, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 24052, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 26100, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 30196, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 32244, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 32564, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 33552, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 33592, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 36792, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 36792, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 36792, Reusable Memory: 11944, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 36792 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 94/ 100 Accuracy: 94.00% +Pred/Tot: 192/ 200 Accuracy: 96.00% +Pred/Tot: 284/ 300 Accuracy: 94.67% +Pred/Tot: 376/ 400 Accuracy: 94.00% +Pred/Tot: 470/ 500 Accuracy: 94.00% +Pred/Tot: 562/ 600 Accuracy: 93.67% +Pred/Tot: 656/ 700 Accuracy: 93.71% +Pred/Tot: 751/ 800 Accuracy: 93.88% +Pred/Tot: 846/ 900 Accuracy: 94.00% +Pred/Tot: 939/1000 Accuracy: 93.90% +Pred/Tot: 1032/1100 Accuracy: 93.82% +Pred/Tot: 1127/1200 Accuracy: 93.92% +Pred/Tot: 1211/1300 Accuracy: 93.15% +Pred/Tot: 1302/1400 Accuracy: 93.00% +Pred/Tot: 1391/1500 Accuracy: 92.73% +Pred/Tot: 1489/1600 Accuracy: 93.06% +Pred/Tot: 1586/1700 Accuracy: 93.29% +Pred/Tot: 1679/1800 Accuracy: 93.28% +Pred/Tot: 1774/1900 Accuracy: 93.37% +Pred/Tot: 1866/2000 Accuracy: 93.30% +Pred/Tot: 1960/2100 Accuracy: 93.33% +Pred/Tot: 2057/2200 Accuracy: 93.50% +Pred/Tot: 2151/2300 Accuracy: 93.52% +Pred/Tot: 2238/2400 Accuracy: 93.25% +Pred/Tot: 2328/2500 Accuracy: 93.12% +Pred/Tot: 2420/2600 Accuracy: 93.08% +Pred/Tot: 2514/2700 Accuracy: 93.11% +Pred/Tot: 2608/2800 Accuracy: 93.14% +Pred/Tot: 2700/2900 Accuracy: 93.10% +Pred/Tot: 2792/3000 Accuracy: 93.07% +Pred/Tot: 2888/3100 Accuracy: 93.16% +Pred/Tot: 2982/3200 Accuracy: 93.19% +Pred/Tot: 3079/3300 Accuracy: 93.30% +Pred/Tot: 3171/3400 Accuracy: 93.26% +Pred/Tot: 3264/3500 Accuracy: 93.26% +Pred/Tot: 3360/3600 Accuracy: 93.33% +Pred/Tot: 3447/3700 Accuracy: 93.16% +Pred/Tot: 3542/3800 Accuracy: 93.21% +Pred/Tot: 3633/3900 Accuracy: 93.15% +Pred/Tot: 3726/4000 Accuracy: 93.15% +Pred/Tot: 3820/4100 Accuracy: 93.17% +Pred/Tot: 3913/4200 Accuracy: 93.17% +Pred/Tot: 4004/4300 Accuracy: 93.12% +Pred/Tot: 4095/4400 Accuracy: 93.07% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4138/4444 Accuracy: 93.11% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 322 3 5 3 7 2 8 7 3 5 5] + [ 0 5 381 0 1 0 9 0 0 0 0 1] + [ 1 12 3 376 0 3 2 0 0 0 0 9] + [ 0 5 1 0 319 0 1 0 1 12 10 1] + [ 1 3 0 12 1 356 1 0 0 0 1 2] + [ 0 5 5 2 2 0 337 1 0 0 0 0] + [ 0 7 1 1 0 0 2 351 0 0 1 0] + [ 2 10 0 2 4 3 0 0 337 5 0 0] + [ 1 3 0 0 23 0 0 1 8 330 3 4] + [ 3 5 0 0 7 0 1 0 1 1 331 1] + [ 0 10 1 14 3 10 0 0 0 2 5 327]] +Pred/Tot: 95/ 100 Accuracy: 95.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 280/ 300 Accuracy: 93.33% +Pred/Tot: 369/ 400 Accuracy: 92.25% +Pred/Tot: 460/ 500 Accuracy: 92.00% +Pred/Tot: 554/ 600 Accuracy: 92.33% +Pred/Tot: 648/ 700 Accuracy: 92.57% +Pred/Tot: 744/ 800 Accuracy: 93.00% +Pred/Tot: 840/ 900 Accuracy: 93.33% +Pred/Tot: 933/1000 Accuracy: 93.30% +Pred/Tot: 1024/1100 Accuracy: 93.09% +Pred/Tot: 1122/1200 Accuracy: 93.50% +Pred/Tot: 1214/1300 Accuracy: 93.38% +Pred/Tot: 1305/1400 Accuracy: 93.21% +Pred/Tot: 1398/1500 Accuracy: 93.20% +Pred/Tot: 1494/1600 Accuracy: 93.38% +Pred/Tot: 1587/1700 Accuracy: 93.35% +Pred/Tot: 1684/1800 Accuracy: 93.56% +Pred/Tot: 1771/1900 Accuracy: 93.21% +Pred/Tot: 1862/2000 Accuracy: 93.10% +Pred/Tot: 1952/2100 Accuracy: 92.95% +Pred/Tot: 2047/2200 Accuracy: 93.05% +Pred/Tot: 2141/2300 Accuracy: 93.09% +Pred/Tot: 2229/2400 Accuracy: 92.88% +Pred/Tot: 2321/2500 Accuracy: 92.84% +Pred/Tot: 2416/2600 Accuracy: 92.92% +Pred/Tot: 2513/2700 Accuracy: 93.07% +Pred/Tot: 2604/2800 Accuracy: 93.00% +Pred/Tot: 2693/2900 Accuracy: 92.86% +Pred/Tot: 2786/3000 Accuracy: 92.87% +Pred/Tot: 2879/3100 Accuracy: 92.87% +Pred/Tot: 2974/3200 Accuracy: 92.94% +Pred/Tot: 3072/3300 Accuracy: 93.09% +Pred/Tot: 3166/3400 Accuracy: 93.12% +Pred/Tot: 3258/3500 Accuracy: 93.09% +Pred/Tot: 3355/3600 Accuracy: 93.19% +Pred/Tot: 3443/3700 Accuracy: 93.05% +Pred/Tot: 3537/3800 Accuracy: 93.08% +Pred/Tot: 3629/3900 Accuracy: 93.05% +Pred/Tot: 3719/4000 Accuracy: 92.97% +Pred/Tot: 3811/4100 Accuracy: 92.95% +Pred/Tot: 3904/4200 Accuracy: 92.95% +Pred/Tot: 4000/4300 Accuracy: 93.02% +Pred/Tot: 4088/4400 Accuracy: 92.91% +Pred/Tot: 4178/4500 Accuracy: 92.84% +Pred/Tot: 4264/4600 Accuracy: 92.70% +Pred/Tot: 4357/4700 Accuracy: 92.70% +Pred/Tot: 4452/4800 Accuracy: 92.75% + +FINAL TESTING ACCURACY: +Pred/Tot: 4534/4889 Accuracy: 92.74% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 1 361 0 6 3 4 6 9 6 3 6 3] + [ 0 17 391 3 0 0 8 0 0 0 0 0] + [ 0 5 0 381 0 13 2 0 0 0 0 4] + [ 0 13 0 0 396 3 0 0 2 3 7 1] + [ 0 15 3 12 1 366 1 0 1 1 0 6] + [ 0 6 6 2 0 0 395 3 0 0 0 0] + [ 0 11 1 0 1 1 1 378 1 0 1 1] + [ 1 7 0 1 8 7 0 0 359 8 1 4] + [ 0 12 1 1 18 0 1 0 10 353 2 4] + [ 0 9 0 1 4 3 0 0 0 0 392 2] + [ 0 8 2 23 4 6 1 1 0 0 3 354]] diff --git a/accuracy_log/log_test_medium_hp_spectr_v2.txt b/accuracy_log/log_test_medium_hp_spectr_v2.txt new file mode 100644 index 0000000..5b47c3f --- /dev/null +++ b/accuracy_log/log_test_medium_hp_spectr_v2.txt @@ -0,0 +1,1842 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +unified_quantizer - forwards SOFTMAX_0_11 in: -32.63<(i8-0.00)*0.25492236<32.38 out: None stop [] fusion False +unified_quantizer - handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +unified_quantizer - forwards in edge 0 does not match was -32.63<(i8-0.00)*0.25492236<32.38 need -64.00<(i8-0.00)*0.50000000<63.50 forced +unified_quantizer - backwards FULLY_CONNECTED_0_10 in: -9.26<(i8-0.00)*0.07232232<9.18,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 92/ 100 Accuracy: 92.00% +Pred/Tot: 189/ 200 Accuracy: 94.50% +Pred/Tot: 281/ 300 Accuracy: 93.67% +Pred/Tot: 373/ 400 Accuracy: 93.25% +Pred/Tot: 469/ 500 Accuracy: 93.80% +Pred/Tot: 559/ 600 Accuracy: 93.17% +Pred/Tot: 652/ 700 Accuracy: 93.14% +Pred/Tot: 744/ 800 Accuracy: 93.00% +Pred/Tot: 837/ 900 Accuracy: 93.00% +Pred/Tot: 934/1000 Accuracy: 93.40% +Pred/Tot: 1029/1100 Accuracy: 93.55% +Pred/Tot: 1119/1200 Accuracy: 93.25% +Pred/Tot: 1206/1300 Accuracy: 92.77% +Pred/Tot: 1298/1400 Accuracy: 92.71% +Pred/Tot: 1386/1500 Accuracy: 92.40% +Pred/Tot: 1483/1600 Accuracy: 92.69% +Pred/Tot: 1575/1700 Accuracy: 92.65% +Pred/Tot: 1668/1800 Accuracy: 92.67% +Pred/Tot: 1762/1900 Accuracy: 92.74% +Pred/Tot: 1856/2000 Accuracy: 92.80% +Pred/Tot: 1948/2100 Accuracy: 92.76% +Pred/Tot: 2042/2200 Accuracy: 92.82% +Pred/Tot: 2132/2300 Accuracy: 92.70% +Pred/Tot: 2222/2400 Accuracy: 92.58% +Pred/Tot: 2310/2500 Accuracy: 92.40% +Pred/Tot: 2402/2600 Accuracy: 92.38% +Pred/Tot: 2496/2700 Accuracy: 92.44% +Pred/Tot: 2587/2800 Accuracy: 92.39% +Pred/Tot: 2679/2900 Accuracy: 92.38% +Pred/Tot: 2767/3000 Accuracy: 92.23% +Pred/Tot: 2862/3100 Accuracy: 92.32% +Pred/Tot: 2956/3200 Accuracy: 92.38% +Pred/Tot: 3051/3300 Accuracy: 92.45% +Pred/Tot: 3140/3400 Accuracy: 92.35% +Pred/Tot: 3230/3500 Accuracy: 92.29% +Pred/Tot: 3323/3600 Accuracy: 92.31% +Pred/Tot: 3414/3700 Accuracy: 92.27% +Pred/Tot: 3510/3800 Accuracy: 92.37% +Pred/Tot: 3604/3900 Accuracy: 92.41% +Pred/Tot: 3699/4000 Accuracy: 92.47% +Pred/Tot: 3793/4100 Accuracy: 92.51% +Pred/Tot: 3886/4200 Accuracy: 92.52% +Pred/Tot: 3982/4300 Accuracy: 92.60% +Pred/Tot: 4074/4400 Accuracy: 92.59% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4116/4444 Accuracy: 92.62% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 301 8 4 7 9 5 12 8 1 4 12] + [ 0 2 390 0 0 1 0 0 0 0 0 4] + [ 0 6 2 363 0 6 0 0 0 1 2 26] + [ 0 2 1 0 325 1 5 0 1 4 8 3] + [ 0 6 2 8 1 348 1 0 0 1 2 8] + [ 0 2 11 2 0 0 333 2 0 2 0 0] + [ 0 6 2 1 0 1 3 350 0 0 0 0] + [ 1 5 0 1 4 1 0 0 333 10 1 7] + [ 0 1 0 2 32 1 2 1 4 323 2 5] + [ 1 0 1 0 11 4 0 0 1 1 330 1] + [ 0 4 0 8 1 6 0 0 1 0 3 349]] +Pred/Tot: 92/ 100 Accuracy: 92.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 276/ 300 Accuracy: 92.00% +Pred/Tot: 367/ 400 Accuracy: 91.75% +Pred/Tot: 457/ 500 Accuracy: 91.40% +Pred/Tot: 550/ 600 Accuracy: 91.67% +Pred/Tot: 645/ 700 Accuracy: 92.14% +Pred/Tot: 738/ 800 Accuracy: 92.25% +Pred/Tot: 832/ 900 Accuracy: 92.44% +Pred/Tot: 924/1000 Accuracy: 92.40% +Pred/Tot: 1015/1100 Accuracy: 92.27% +Pred/Tot: 1108/1200 Accuracy: 92.33% +Pred/Tot: 1197/1300 Accuracy: 92.08% +Pred/Tot: 1291/1400 Accuracy: 92.21% +Pred/Tot: 1383/1500 Accuracy: 92.20% +Pred/Tot: 1477/1600 Accuracy: 92.31% +Pred/Tot: 1572/1700 Accuracy: 92.47% +Pred/Tot: 1667/1800 Accuracy: 92.61% +Pred/Tot: 1758/1900 Accuracy: 92.53% +Pred/Tot: 1849/2000 Accuracy: 92.45% +Pred/Tot: 1940/2100 Accuracy: 92.38% +Pred/Tot: 2034/2200 Accuracy: 92.45% +Pred/Tot: 2127/2300 Accuracy: 92.48% +Pred/Tot: 2215/2400 Accuracy: 92.29% +Pred/Tot: 2308/2500 Accuracy: 92.32% +Pred/Tot: 2400/2600 Accuracy: 92.31% +Pred/Tot: 2493/2700 Accuracy: 92.33% +Pred/Tot: 2585/2800 Accuracy: 92.32% +Pred/Tot: 2672/2900 Accuracy: 92.14% +Pred/Tot: 2764/3000 Accuracy: 92.13% +Pred/Tot: 2853/3100 Accuracy: 92.03% +Pred/Tot: 2947/3200 Accuracy: 92.09% +Pred/Tot: 3045/3300 Accuracy: 92.27% +Pred/Tot: 3141/3400 Accuracy: 92.38% +Pred/Tot: 3234/3500 Accuracy: 92.40% +Pred/Tot: 3330/3600 Accuracy: 92.50% +Pred/Tot: 3423/3700 Accuracy: 92.51% +Pred/Tot: 3515/3800 Accuracy: 92.50% +Pred/Tot: 3610/3900 Accuracy: 92.56% +Pred/Tot: 3701/4000 Accuracy: 92.53% +Pred/Tot: 3794/4100 Accuracy: 92.54% +Pred/Tot: 3887/4200 Accuracy: 92.55% +Pred/Tot: 3984/4300 Accuracy: 92.65% +Pred/Tot: 4073/4400 Accuracy: 92.57% +Pred/Tot: 4168/4500 Accuracy: 92.62% +Pred/Tot: 4254/4600 Accuracy: 92.48% +Pred/Tot: 4344/4700 Accuracy: 92.43% +Pred/Tot: 4438/4800 Accuracy: 92.46% + +FINAL TESTING ACCURACY: +Pred/Tot: 4520/4889 Accuracy: 92.45% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 339 2 6 4 11 7 12 5 2 9 11] + [ 0 3 411 0 2 0 1 0 0 0 0 2] + [ 0 4 5 358 1 16 0 0 0 0 0 21] + [ 0 5 0 0 397 3 0 1 4 3 8 4] + [ 0 4 3 10 0 375 3 0 0 0 3 8] + [ 0 0 11 3 0 0 392 5 0 0 1 0] + [ 0 11 0 0 0 1 4 379 0 0 1 0] + [ 0 8 0 0 6 5 0 0 355 8 1 13] + [ 0 3 0 1 27 2 0 1 7 343 2 16] + [ 0 4 0 0 7 3 0 1 0 0 389 7] + [ 0 3 1 13 3 5 2 0 0 0 1 374]] diff --git a/accuracy_log/log_test_medium_power.txt b/accuracy_log/log_test_medium_power.txt new file mode 100644 index 0000000..6ef5e15 --- /dev/null +++ b/accuracy_log/log_test_medium_power.txt @@ -0,0 +1,1499 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_m_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_m_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x172 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 172x25x10 | 0 | 43490 | 7052 | 1.72M | F 172x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 172x25x10 | 172x13x5 | 1 | 54180 | 181 | 100.62K | F 172x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 2 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 3 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 4 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 5 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 6 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 7 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 8 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 172x13x5 | 172x1x1 | 9 | 11352 | 0 | 11.35K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 172x1x1 | 12 | 10 | 184 | 2076 | 2.06K | F 12x172x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 54180 | 128876 | 9.83M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 183056 | 9.83M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 54438<16.33 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -16.45>chan | Q32.0 | Q32.0 | +| | D_0_1 | 4438<16.33 | 91819<19.55 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -19.70>chan | Q32.0 | Q32.0 | +| | | 1819<19.55 | 14758<16.53 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -16.66>chan | Q32.0 | Q32.0 | +| | D_0_3 | 4758<16.53 | 06785<14.87 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -14.98>chan | Q32.0 | Q32.0 | +| | | 6785<14.87 | 44783<10.60 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.68>chan | Q32.0 | Q32.0 | +| | D_0_5 | 4783<10.60 | 07906<11.82 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -11.91>chan | Q32.0 | Q32.0 | +| | | 7906<11.82 | 0190<9.61 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -9.69>chan | Q32.0 | Q32.0 | +| | D_0_7 | 190<9.61 | 31889<11.34 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -11.43>chan | Q32.0 | Q32.0 | +| | | 1889<11.34 | 4916<9.16 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -9.24>chan | Q32.0 | Q32.0 | +| | 0_10 | 916<9.16 | 00000<35.45 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -35.73 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S1_Conv2d_172x1x10x4_Relu ======================= +S1_Conv2d_172x1x10x4_Relu Partition[0] Size = 194945 (Min: 200, Max: 275249), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 bytes will require 688 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 6880, Total: 8352, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20672, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45312, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45324, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45324, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S1_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 bytes +S1_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +S1_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S2_Conv2d_172x1x3x3_Relu ======================= +S2_Conv2d_172x1x3x3_Relu Partition[0] Size = 63681 (Min: 60, Max: 165385), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 688 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 1548 bytes buffer +S2_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +S2_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_172x172x1x1_Relu ======================= +S3_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S3_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S3_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x3x3_Relu ======================= +S4_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S4_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S4_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_172x172x1x1_Relu ======================= +S5_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S5_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S5_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S6_Conv2d_172x1x3x3_Relu ======================= +S6_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S6_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S6_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_172x172x1x1_Relu ======================= +S7_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S7_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S7_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S8_Conv2d_172x1x3x3_Relu ======================= +S8_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S8_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S8_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_172x172x1x1_Relu ======================= +S9_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S9_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S9_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S10_AveragePool_13x5 ======================= +S10_AveragePool_13x5 Partition[0] Size = 23069 (Min: 130, Max: 22749), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x172x1x1 ======================= +S11_Linear_12x172x1x1 Partition[0] Size = 4505 (Min: 0, Max: 4673), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x172x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_13x5 To S11_Linear_12x172x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_172x172x1x1_Relu To S10_AveragePool_13x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_172x1x3x3_Relu To S9_Conv2d_172x172x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x172x1x1_Relu To S8_Conv2d_172x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_172x1x3x3_Relu To S7_Conv2d_172x172x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_172x172x1x1_Relu To S6_Conv2d_172x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x3x3_Relu To S5_Conv2d_172x172x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_172x172x1x1_Relu To S4_Conv2d_172x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_172x1x3x3_Relu To S3_Conv2d_172x172x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_172x1x10x4_Relu To S2_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_13x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => S1_Weights --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => S1_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S1_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => S2_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S2_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S3_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S3_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S4_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S5_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S5_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S6_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S6_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S7_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S8_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S8_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S9_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S9_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_13x5, Operations: 11180 + I Buff In => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x172x1x1, Operations: 2064 + I Buff In => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 409672, Move overhead: 1.002373 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 409672, Tiling Overhead Average: 1.002373 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S2_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S3_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S4_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S5_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S6_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S7_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S8_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S9_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S11_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_13x5 +Generating Code For User Kernel: S11_Linear_12x172x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45484 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 409672 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.002373 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 2.60742 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_MEDIUM.h --use_power +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DMEDIUM +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 24894 (Min: 0, Max: 25622), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 3.200000, Size: 4096, Total: 10576, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 3.200000, Size: 4096, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 15696, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 15856, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 15860, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 17908, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 22004, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 24052, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 24372, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 25360, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 25400, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 28600, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 28600, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 28600, Reusable Memory: 20136, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 28600 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 196/ 200 Accuracy: 98.00% +Pred/Tot: 288/ 300 Accuracy: 96.00% +Pred/Tot: 380/ 400 Accuracy: 95.00% +Pred/Tot: 474/ 500 Accuracy: 94.80% +Pred/Tot: 568/ 600 Accuracy: 94.67% +Pred/Tot: 663/ 700 Accuracy: 94.71% +Pred/Tot: 755/ 800 Accuracy: 94.38% +Pred/Tot: 849/ 900 Accuracy: 94.33% +Pred/Tot: 945/1000 Accuracy: 94.50% +Pred/Tot: 1038/1100 Accuracy: 94.36% +Pred/Tot: 1126/1200 Accuracy: 93.83% +Pred/Tot: 1214/1300 Accuracy: 93.38% +Pred/Tot: 1305/1400 Accuracy: 93.21% +Pred/Tot: 1395/1500 Accuracy: 93.00% +Pred/Tot: 1493/1600 Accuracy: 93.31% +Pred/Tot: 1587/1700 Accuracy: 93.35% +Pred/Tot: 1683/1800 Accuracy: 93.50% +Pred/Tot: 1778/1900 Accuracy: 93.58% +Pred/Tot: 1876/2000 Accuracy: 93.80% +Pred/Tot: 1968/2100 Accuracy: 93.71% +Pred/Tot: 2064/2200 Accuracy: 93.82% +Pred/Tot: 2160/2300 Accuracy: 93.91% +Pred/Tot: 2252/2400 Accuracy: 93.83% +Pred/Tot: 2344/2500 Accuracy: 93.76% +Pred/Tot: 2436/2600 Accuracy: 93.69% +Pred/Tot: 2532/2700 Accuracy: 93.78% +Pred/Tot: 2625/2800 Accuracy: 93.75% +Pred/Tot: 2716/2900 Accuracy: 93.66% +Pred/Tot: 2808/3000 Accuracy: 93.60% +Pred/Tot: 2903/3100 Accuracy: 93.65% +Pred/Tot: 2998/3200 Accuracy: 93.69% +Pred/Tot: 3094/3300 Accuracy: 93.76% +Pred/Tot: 3183/3400 Accuracy: 93.62% +Pred/Tot: 3276/3500 Accuracy: 93.60% +Pred/Tot: 3371/3600 Accuracy: 93.64% +Pred/Tot: 3460/3700 Accuracy: 93.51% +Pred/Tot: 3557/3800 Accuracy: 93.61% +Pred/Tot: 3651/3900 Accuracy: 93.62% +Pred/Tot: 3744/4000 Accuracy: 93.60% +Pred/Tot: 3839/4100 Accuracy: 93.63% +Pred/Tot: 3933/4200 Accuracy: 93.64% +Pred/Tot: 4028/4300 Accuracy: 93.67% +Pred/Tot: 4120/4400 Accuracy: 93.64% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4162/4444 Accuracy: 93.65% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 332 2 5 2 6 0 5 9 2 2 5] + [ 0 7 385 0 0 0 2 1 0 0 0 2] + [ 0 11 1 373 0 9 2 0 0 0 2 8] + [ 0 4 1 0 325 3 2 0 1 6 7 1] + [ 0 1 0 8 0 359 0 0 0 0 3 6] + [ 0 4 9 1 0 0 334 4 0 0 0 0] + [ 0 11 1 1 0 1 1 346 0 0 1 1] + [ 1 7 0 2 4 1 0 1 342 5 0 0] + [ 0 4 0 1 22 1 3 0 4 334 2 2] + [ 1 2 0 0 6 2 0 0 0 2 336 1] + [ 0 8 2 20 0 12 0 0 1 2 2 325]] +Pred/Tot: 94/ 100 Accuracy: 94.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 280/ 300 Accuracy: 93.33% +Pred/Tot: 372/ 400 Accuracy: 93.00% +Pred/Tot: 464/ 500 Accuracy: 92.80% +Pred/Tot: 559/ 600 Accuracy: 93.17% +Pred/Tot: 656/ 700 Accuracy: 93.71% +Pred/Tot: 750/ 800 Accuracy: 93.75% +Pred/Tot: 844/ 900 Accuracy: 93.78% +Pred/Tot: 939/1000 Accuracy: 93.90% +Pred/Tot: 1029/1100 Accuracy: 93.55% +Pred/Tot: 1126/1200 Accuracy: 93.83% +Pred/Tot: 1220/1300 Accuracy: 93.85% +Pred/Tot: 1313/1400 Accuracy: 93.79% +Pred/Tot: 1406/1500 Accuracy: 93.73% +Pred/Tot: 1502/1600 Accuracy: 93.88% +Pred/Tot: 1596/1700 Accuracy: 93.88% +Pred/Tot: 1693/1800 Accuracy: 94.06% +Pred/Tot: 1781/1900 Accuracy: 93.74% +Pred/Tot: 1877/2000 Accuracy: 93.85% +Pred/Tot: 1971/2100 Accuracy: 93.86% +Pred/Tot: 2066/2200 Accuracy: 93.91% +Pred/Tot: 2159/2300 Accuracy: 93.87% +Pred/Tot: 2252/2400 Accuracy: 93.83% +Pred/Tot: 2345/2500 Accuracy: 93.80% +Pred/Tot: 2437/2600 Accuracy: 93.73% +Pred/Tot: 2530/2700 Accuracy: 93.70% +Pred/Tot: 2626/2800 Accuracy: 93.79% +Pred/Tot: 2720/2900 Accuracy: 93.79% +Pred/Tot: 2815/3000 Accuracy: 93.83% +Pred/Tot: 2909/3100 Accuracy: 93.84% +Pred/Tot: 3003/3200 Accuracy: 93.84% +Pred/Tot: 3098/3300 Accuracy: 93.88% +Pred/Tot: 3190/3400 Accuracy: 93.82% +Pred/Tot: 3286/3500 Accuracy: 93.89% +Pred/Tot: 3383/3600 Accuracy: 93.97% +Pred/Tot: 3476/3700 Accuracy: 93.95% +Pred/Tot: 3572/3800 Accuracy: 94.00% +Pred/Tot: 3667/3900 Accuracy: 94.03% +Pred/Tot: 3759/4000 Accuracy: 93.97% +Pred/Tot: 3850/4100 Accuracy: 93.90% +Pred/Tot: 3947/4200 Accuracy: 93.98% +Pred/Tot: 4046/4300 Accuracy: 94.09% +Pred/Tot: 4141/4400 Accuracy: 94.11% +Pred/Tot: 4233/4500 Accuracy: 94.07% +Pred/Tot: 4325/4600 Accuracy: 94.02% +Pred/Tot: 4419/4700 Accuracy: 94.02% +Pred/Tot: 4513/4800 Accuracy: 94.02% + +FINAL TESTING ACCURACY: +Pred/Tot: 4595/4889 Accuracy: 93.99% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 376 1 4 2 5 2 3 7 2 3 3] + [ 0 7 407 3 0 0 1 0 0 0 1 0] + [ 0 6 1 381 1 8 1 1 0 0 0 6] + [ 0 7 0 0 399 2 0 0 3 7 6 1] + [ 0 7 2 10 0 379 0 0 0 0 2 6] + [ 0 9 7 1 0 0 393 1 0 0 1 0] + [ 0 18 0 0 0 3 6 366 2 0 1 0] + [ 0 10 0 0 2 5 0 0 362 12 1 4] + [ 0 3 0 1 12 1 0 0 9 370 3 3] + [ 0 5 0 0 1 4 0 0 0 1 399 1] + [ 0 4 1 31 1 3 0 2 2 1 2 355]] diff --git a/accuracy_log/log_test_medium_power_v2.txt b/accuracy_log/log_test_medium_power_v2.txt new file mode 100644 index 0000000..a61de1c --- /dev/null +++ b/accuracy_log/log_test_medium_power_v2.txt @@ -0,0 +1,2090 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -35.73<(i8-0.00)*0.27912489<35.45 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -35.73<(i8-0.00)*0.27912489<35.45 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x172 B 1 +backwards FULLY_CONNECTED_0_10 in: -9.24<(i8-0.00)*0.07214916<9.16,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 13:46:59.851809 140185943275328 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 13:46:59.852335 140185943275328 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 13:46:59.852656 140185943275328 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 13:46:59.853907: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 13:46:59.862975: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 13:46:59.863663: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x562d4bdebce0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 13:46:59.863717: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 13:46:59.865911: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 13:46:59.865988: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 13:46:59.866022: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 13:48:39.440713 140185943275328 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 13:48:39.442981 140185943275328 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 13:48:39.781874 140185943275328 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 13:48:39.782142 140185943275328 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 13:50:44.848578 140185943275328 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 197/ 200 Accuracy: 98.50% +Pred/Tot: 290/ 300 Accuracy: 96.67% +Pred/Tot: 385/ 400 Accuracy: 96.25% +Pred/Tot: 480/ 500 Accuracy: 96.00% +Pred/Tot: 573/ 600 Accuracy: 95.50% +Pred/Tot: 669/ 700 Accuracy: 95.57% +Pred/Tot: 761/ 800 Accuracy: 95.12% +Pred/Tot: 858/ 900 Accuracy: 95.33% +Pred/Tot: 955/1000 Accuracy: 95.50% +Pred/Tot: 1049/1100 Accuracy: 95.36% +Pred/Tot: 1142/1200 Accuracy: 95.17% +Pred/Tot: 1228/1300 Accuracy: 94.46% +Pred/Tot: 1320/1400 Accuracy: 94.29% +Pred/Tot: 1411/1500 Accuracy: 94.07% +Pred/Tot: 1509/1600 Accuracy: 94.31% +Pred/Tot: 1603/1700 Accuracy: 94.29% +Pred/Tot: 1700/1800 Accuracy: 94.44% +Pred/Tot: 1797/1900 Accuracy: 94.58% +Pred/Tot: 1894/2000 Accuracy: 94.70% +Pred/Tot: 1990/2100 Accuracy: 94.76% +Pred/Tot: 2085/2200 Accuracy: 94.77% +Pred/Tot: 2181/2300 Accuracy: 94.83% +Pred/Tot: 2273/2400 Accuracy: 94.71% +Pred/Tot: 2364/2500 Accuracy: 94.56% +Pred/Tot: 2457/2600 Accuracy: 94.50% +Pred/Tot: 2551/2700 Accuracy: 94.48% +Pred/Tot: 2646/2800 Accuracy: 94.50% +Pred/Tot: 2740/2900 Accuracy: 94.48% +Pred/Tot: 2830/3000 Accuracy: 94.33% +Pred/Tot: 2927/3100 Accuracy: 94.42% +Pred/Tot: 3024/3200 Accuracy: 94.50% +Pred/Tot: 3120/3300 Accuracy: 94.55% +Pred/Tot: 3210/3400 Accuracy: 94.41% +Pred/Tot: 3302/3500 Accuracy: 94.34% +Pred/Tot: 3399/3600 Accuracy: 94.42% +Pred/Tot: 3490/3700 Accuracy: 94.32% +Pred/Tot: 3588/3800 Accuracy: 94.42% +Pred/Tot: 3682/3900 Accuracy: 94.41% +Pred/Tot: 3776/4000 Accuracy: 94.40% +Pred/Tot: 3872/4100 Accuracy: 94.44% +Pred/Tot: 3966/4200 Accuracy: 94.43% +Pred/Tot: 4061/4300 Accuracy: 94.44% +Pred/Tot: 4153/4400 Accuracy: 94.39% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4196/4444 Accuracy: 94.42% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 333 1 3 4 5 0 7 8 2 3 4] + [ 0 4 388 0 0 2 1 0 0 0 0 2] + [ 0 10 1 380 0 7 1 0 0 0 2 5] + [ 0 2 1 0 327 2 4 0 2 6 6 0] + [ 0 1 0 7 0 360 0 0 0 0 4 5] + [ 0 3 6 1 1 2 336 2 0 0 0 1] + [ 0 9 1 1 0 1 1 349 0 0 0 1] + [ 0 6 0 1 5 1 0 1 343 4 1 1] + [ 0 4 0 0 23 1 3 0 4 335 2 1] + [ 1 2 0 0 10 1 0 0 0 1 334 1] + [ 0 7 0 14 0 6 0 0 1 2 2 340]] +Pred/Tot: 95/ 100 Accuracy: 95.00% +Pred/Tot: 190/ 200 Accuracy: 95.00% +Pred/Tot: 281/ 300 Accuracy: 93.67% +Pred/Tot: 375/ 400 Accuracy: 93.75% +Pred/Tot: 468/ 500 Accuracy: 93.60% +Pred/Tot: 562/ 600 Accuracy: 93.67% +Pred/Tot: 657/ 700 Accuracy: 93.86% +Pred/Tot: 751/ 800 Accuracy: 93.88% +Pred/Tot: 846/ 900 Accuracy: 94.00% +Pred/Tot: 943/1000 Accuracy: 94.30% +Pred/Tot: 1034/1100 Accuracy: 94.00% +Pred/Tot: 1130/1200 Accuracy: 94.17% +Pred/Tot: 1222/1300 Accuracy: 94.00% +Pred/Tot: 1316/1400 Accuracy: 94.00% +Pred/Tot: 1409/1500 Accuracy: 93.93% +Pred/Tot: 1504/1600 Accuracy: 94.00% +Pred/Tot: 1600/1700 Accuracy: 94.12% +Pred/Tot: 1698/1800 Accuracy: 94.33% +Pred/Tot: 1787/1900 Accuracy: 94.05% +Pred/Tot: 1880/2000 Accuracy: 94.00% +Pred/Tot: 1973/2100 Accuracy: 93.95% +Pred/Tot: 2067/2200 Accuracy: 93.95% +Pred/Tot: 2159/2300 Accuracy: 93.87% +Pred/Tot: 2251/2400 Accuracy: 93.79% +Pred/Tot: 2345/2500 Accuracy: 93.80% +Pred/Tot: 2438/2600 Accuracy: 93.77% +Pred/Tot: 2533/2700 Accuracy: 93.81% +Pred/Tot: 2629/2800 Accuracy: 93.89% +Pred/Tot: 2723/2900 Accuracy: 93.90% +Pred/Tot: 2818/3000 Accuracy: 93.93% +Pred/Tot: 2914/3100 Accuracy: 94.00% +Pred/Tot: 3010/3200 Accuracy: 94.06% +Pred/Tot: 3103/3300 Accuracy: 94.03% +Pred/Tot: 3197/3400 Accuracy: 94.03% +Pred/Tot: 3293/3500 Accuracy: 94.09% +Pred/Tot: 3391/3600 Accuracy: 94.19% +Pred/Tot: 3484/3700 Accuracy: 94.16% +Pred/Tot: 3580/3800 Accuracy: 94.21% +Pred/Tot: 3673/3900 Accuracy: 94.18% +Pred/Tot: 3766/4000 Accuracy: 94.15% +Pred/Tot: 3859/4100 Accuracy: 94.12% +Pred/Tot: 3955/4200 Accuracy: 94.17% +Pred/Tot: 4051/4300 Accuracy: 94.21% +Pred/Tot: 4148/4400 Accuracy: 94.27% +Pred/Tot: 4241/4500 Accuracy: 94.24% +Pred/Tot: 4329/4600 Accuracy: 94.11% +Pred/Tot: 4423/4700 Accuracy: 94.11% +Pred/Tot: 4516/4800 Accuracy: 94.08% + +FINAL TESTING ACCURACY: +Pred/Tot: 4601/4889 Accuracy: 94.11% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 382 0 3 3 5 3 3 4 2 2 1] + [ 0 7 406 4 0 1 1 0 0 0 0 0] + [ 0 5 0 389 2 3 1 0 0 1 0 4] + [ 0 6 0 0 400 3 0 0 4 7 4 1] + [ 0 5 1 14 0 373 1 0 2 0 1 9] + [ 0 7 6 1 0 0 397 1 0 0 0 0] + [ 0 14 0 0 0 3 6 370 1 0 1 1] + [ 0 7 0 0 3 9 0 0 363 12 1 1] + [ 0 9 0 1 13 1 0 0 5 368 2 3] + [ 0 5 1 0 6 5 0 0 1 2 387 4] + [ 0 9 0 26 1 2 0 2 1 1 2 358]] diff --git a/accuracy_log/log_test_medium_power_v2_norm9.txt b/accuracy_log/log_test_medium_power_v2_norm9.txt new file mode 100644 index 0000000..d9ed416 --- /dev/null +++ b/accuracy_log/log_test_medium_power_v2_norm9.txt @@ -0,0 +1,1850 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -35.73<(i8-0.00)*0.27912489<35.45 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -35.73<(i8-0.00)*0.27912489<35.45 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x172 B 1 +backwards FULLY_CONNECTED_0_10 in: -9.24<(i8-0.00)*0.07214916<9.16,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 97/ 100 Accuracy: 97.00% +Pred/Tot: 197/ 200 Accuracy: 98.50% +Pred/Tot: 290/ 300 Accuracy: 96.67% +Pred/Tot: 385/ 400 Accuracy: 96.25% +Pred/Tot: 480/ 500 Accuracy: 96.00% +Pred/Tot: 573/ 600 Accuracy: 95.50% +Pred/Tot: 669/ 700 Accuracy: 95.57% +Pred/Tot: 761/ 800 Accuracy: 95.12% +Pred/Tot: 858/ 900 Accuracy: 95.33% +Pred/Tot: 955/1000 Accuracy: 95.50% +Pred/Tot: 1049/1100 Accuracy: 95.36% +Pred/Tot: 1142/1200 Accuracy: 95.17% +Pred/Tot: 1228/1300 Accuracy: 94.46% +Pred/Tot: 1320/1400 Accuracy: 94.29% +Pred/Tot: 1411/1500 Accuracy: 94.07% +Pred/Tot: 1509/1600 Accuracy: 94.31% +Pred/Tot: 1603/1700 Accuracy: 94.29% +Pred/Tot: 1700/1800 Accuracy: 94.44% +Pred/Tot: 1797/1900 Accuracy: 94.58% +Pred/Tot: 1894/2000 Accuracy: 94.70% +Pred/Tot: 1990/2100 Accuracy: 94.76% +Pred/Tot: 2085/2200 Accuracy: 94.77% +Pred/Tot: 2181/2300 Accuracy: 94.83% +Pred/Tot: 2273/2400 Accuracy: 94.71% +Pred/Tot: 2364/2500 Accuracy: 94.56% +Pred/Tot: 2457/2600 Accuracy: 94.50% +Pred/Tot: 2551/2700 Accuracy: 94.48% +Pred/Tot: 2646/2800 Accuracy: 94.50% +Pred/Tot: 2740/2900 Accuracy: 94.48% +Pred/Tot: 2830/3000 Accuracy: 94.33% +Pred/Tot: 2927/3100 Accuracy: 94.42% +Pred/Tot: 3024/3200 Accuracy: 94.50% +Pred/Tot: 3120/3300 Accuracy: 94.55% +Pred/Tot: 3210/3400 Accuracy: 94.41% +Pred/Tot: 3302/3500 Accuracy: 94.34% +Pred/Tot: 3399/3600 Accuracy: 94.42% +Pred/Tot: 3490/3700 Accuracy: 94.32% +Pred/Tot: 3588/3800 Accuracy: 94.42% +Pred/Tot: 3682/3900 Accuracy: 94.41% +Pred/Tot: 3776/4000 Accuracy: 94.40% +Pred/Tot: 3872/4100 Accuracy: 94.44% +Pred/Tot: 3966/4200 Accuracy: 94.43% +Pred/Tot: 4061/4300 Accuracy: 94.44% +Pred/Tot: 4153/4400 Accuracy: 94.39% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4196/4444 Accuracy: 94.42% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 333 1 3 4 5 0 7 8 2 3 4] + [ 0 4 388 0 0 2 1 0 0 0 0 2] + [ 0 10 1 380 0 7 1 0 0 0 2 5] + [ 0 2 1 0 327 2 4 0 2 6 6 0] + [ 0 1 0 7 0 360 0 0 0 0 4 5] + [ 0 3 6 1 1 2 336 2 0 0 0 1] + [ 0 9 1 1 0 1 1 349 0 0 0 1] + [ 0 6 0 1 5 1 0 1 343 4 1 1] + [ 0 4 0 0 23 1 3 0 4 335 2 1] + [ 1 2 0 0 10 1 0 0 0 1 334 1] + [ 0 7 0 14 0 6 0 0 1 2 2 340]] +Pred/Tot: 95/ 100 Accuracy: 95.00% +Pred/Tot: 190/ 200 Accuracy: 95.00% +Pred/Tot: 281/ 300 Accuracy: 93.67% +Pred/Tot: 375/ 400 Accuracy: 93.75% +Pred/Tot: 468/ 500 Accuracy: 93.60% +Pred/Tot: 562/ 600 Accuracy: 93.67% +Pred/Tot: 657/ 700 Accuracy: 93.86% +Pred/Tot: 751/ 800 Accuracy: 93.88% +Pred/Tot: 846/ 900 Accuracy: 94.00% +Pred/Tot: 943/1000 Accuracy: 94.30% +Pred/Tot: 1034/1100 Accuracy: 94.00% +Pred/Tot: 1130/1200 Accuracy: 94.17% +Pred/Tot: 1222/1300 Accuracy: 94.00% +Pred/Tot: 1316/1400 Accuracy: 94.00% +Pred/Tot: 1409/1500 Accuracy: 93.93% +Pred/Tot: 1504/1600 Accuracy: 94.00% +Pred/Tot: 1600/1700 Accuracy: 94.12% +Pred/Tot: 1698/1800 Accuracy: 94.33% +Pred/Tot: 1787/1900 Accuracy: 94.05% +Pred/Tot: 1880/2000 Accuracy: 94.00% +Pred/Tot: 1973/2100 Accuracy: 93.95% +Pred/Tot: 2067/2200 Accuracy: 93.95% +Pred/Tot: 2159/2300 Accuracy: 93.87% +Pred/Tot: 2251/2400 Accuracy: 93.79% +Pred/Tot: 2345/2500 Accuracy: 93.80% +Pred/Tot: 2438/2600 Accuracy: 93.77% +Pred/Tot: 2533/2700 Accuracy: 93.81% +Pred/Tot: 2629/2800 Accuracy: 93.89% +Pred/Tot: 2723/2900 Accuracy: 93.90% +Pred/Tot: 2818/3000 Accuracy: 93.93% +Pred/Tot: 2914/3100 Accuracy: 94.00% +Pred/Tot: 3010/3200 Accuracy: 94.06% +Pred/Tot: 3103/3300 Accuracy: 94.03% +Pred/Tot: 3197/3400 Accuracy: 94.03% +Pred/Tot: 3293/3500 Accuracy: 94.09% +Pred/Tot: 3391/3600 Accuracy: 94.19% +Pred/Tot: 3484/3700 Accuracy: 94.16% +Pred/Tot: 3580/3800 Accuracy: 94.21% +Pred/Tot: 3673/3900 Accuracy: 94.18% +Pred/Tot: 3766/4000 Accuracy: 94.15% +Pred/Tot: 3859/4100 Accuracy: 94.12% +Pred/Tot: 3955/4200 Accuracy: 94.17% +Pred/Tot: 4051/4300 Accuracy: 94.21% +Pred/Tot: 4148/4400 Accuracy: 94.27% +Pred/Tot: 4241/4500 Accuracy: 94.24% +Pred/Tot: 4329/4600 Accuracy: 94.11% +Pred/Tot: 4423/4700 Accuracy: 94.11% +Pred/Tot: 4516/4800 Accuracy: 94.08% + +FINAL TESTING ACCURACY: +Pred/Tot: 4601/4889 Accuracy: 94.11% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 382 0 3 3 5 3 3 4 2 2 1] + [ 0 7 406 4 0 1 1 0 0 0 0 0] + [ 0 5 0 389 2 3 1 0 0 1 0 4] + [ 0 6 0 0 400 3 0 0 4 7 4 1] + [ 0 5 1 14 0 373 1 0 2 0 1 9] + [ 0 7 6 1 0 0 397 1 0 0 0 0] + [ 0 14 0 0 0 3 6 370 1 0 1 1] + [ 0 7 0 0 3 9 0 0 363 12 1 1] + [ 0 9 0 1 13 1 0 0 5 368 2 3] + [ 0 5 1 0 6 5 0 0 1 2 387 4] + [ 0 9 0 26 1 2 0 2 1 1 2 358]] diff --git a/accuracy_log/log_test_medium_spectr.txt b/accuracy_log/log_test_medium_spectr.txt new file mode 100644 index 0000000..1d68b6e --- /dev/null +++ b/accuracy_log/log_test_medium_spectr.txt @@ -0,0 +1,1499 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=0 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_m_quant.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_m_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_0[0] +eliminate_transposes - looking down at CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_0 +eliminate_transposes_actions - CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x172 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes CONV_2D_0_0,CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | CONV_2D_0_0_fusion | conv_fusion_conv_active | 1x49x10 | 172x25x10 | 0 | 43490 | 7052 | 1.72M | F 172x1x10x4 S 2x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 4x5x1x2 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 172x25x10 | 172x13x5 | 1 | 54180 | 181 | 100.62K | F 172x1x3x3 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x0x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 2 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 3 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 4 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 5 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 6 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 7 | 22360 | 181 | 100.62K | F 172x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 172 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 172x13x5 | 172x13x5 | 8 | 22360 | 29756 | 1.92M | F 172x172x1x1 S 1x1 D 1x1 | in: cxhxw out: cxhxw | +| | | | | | | | | | G 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 172x13x5 | 172x1x1 | 9 | 11352 | 0 | 11.35K | T average F 13x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 172x1x1 | 12 | 10 | 184 | 2076 | 2.06K | F 12x172x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 54180 | 128876 | 9.83M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 183056 | 9.83M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | | 27124<246.03 | 09111<35.57 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | CONV_2D_0_0_acti | -35.85>chan | Q32.0 | Q32.0 | +| | D_0_1 | 9111<35.57 | 17583<35.07 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -35.35>chan | Q32.0 | Q32.0 | +| | | 7583<35.07 | 67131<30.57 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -30.81>chan | Q32.0 | Q32.0 | +| | D_0_3 | 7131<30.57 | 10654<27.19 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -27.41>chan | Q32.0 | Q32.0 | +| | | 0654<27.19 | 74510<17.87 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -18.02>chan | Q32.0 | Q32.0 | +| | D_0_5 | 4510<17.87 | 73493<17.37 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -17.50>chan | Q32.0 | Q32.0 | +| | | 3493<17.37 | 07283<14.74 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -14.86>chan | Q32.0 | Q32.0 | +| | D_0_7 | 7283<14.74 | 30772<17.31 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -17.45>chan | Q32.0 | Q32.0 | +| | | 0772<17.31 | 2234<9.18 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -9.26>chan | Q32.0 | Q32.0 | +| | 0_10 | 234<9.18 | 00000<32.38 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -32.63 W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S1_Conv2d_172x1x10x4_Relu ======================= +S1_Conv2d_172x1x10x4_Relu Partition[0] Size = 194945 (Min: 200, Max: 275249), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 bytes will require 688 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 bytes will require 172 bytes buffer +S1_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 6880, Total: 8352, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20672, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45312, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45324, Move: 9 (Decl x 1.000000) L2 +S1_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45324, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S1_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 bytes +S1_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +S1_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S2_Conv2d_172x1x3x3_Relu ======================= +S2_Conv2d_172x1x3x3_Relu Partition[0] Size = 63681 (Min: 60, Max: 165385), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 bytes will require 688 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 bytes will require 172 bytes buffer +S2_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 bytes will require 1548 bytes buffer +S2_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +S2_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_172x172x1x1_Relu ======================= +S3_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S3_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S3_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S3_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S3_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S3_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x3x3_Relu ======================= +S4_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S4_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S4_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S4_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_172x172x1x1_Relu ======================= +S5_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S5_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S5_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S5_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S5_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S5_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S6_Conv2d_172x1x3x3_Relu ======================= +S6_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S6_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S6_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S6_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_172x172x1x1_Relu ======================= +S7_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S7_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S7_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S7_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S7_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S8_Conv2d_172x1x3x3_Relu ======================= +S8_Conv2d_172x1x3x3_Relu Partition[0] Size = 56801 (Min: 30, Max: 98305), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 bytes will require 688 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 bytes will require 172 bytes buffer +S8_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 bytes will require 1548 bytes buffer +S8_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +S8_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_172x172x1x1_Relu ======================= +S9_Conv2d_172x172x1x1_Relu Partition[0] Size = 2105 (Min: 1376, Max: 23153), Fraction: 0.30, Giving: 14737 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu Partition[1] Size = 4856 (Min: 2752, Max: 82720), Fraction: 0.70, Giving: 33998 bytes out of 48736 bytes +S9_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 64 [ 172], Size: 22016, Total: 22016, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 22704, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 64 [ 172], Size: 8320, Total: 31024, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 31196, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 31368, Move: 172 (Decl x 1.000000) L2 +S9_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 31368, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S9_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +S9_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 33998, Used L1 Memory: 31368, Reusable Memory: 2628, Used L2 Memory: 0 +Kernel: S9_Conv2d_172x172x1x1_Relu, Total Raw Memory: 11880 fits into L1 memory 14737. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 14737, Used L1 Memory: 11880, Reusable Memory: 2856, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S10_AveragePool_13x5 ======================= +S10_AveragePool_13x5 Partition[0] Size = 23069 (Min: 130, Max: 22749), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x172x1x1 ======================= +S11_Linear_12x172x1x1 Partition[0] Size = 4505 (Min: 0, Max: 4673), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x172x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_13x5 To S11_Linear_12x172x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_172x172x1x1_Relu To S10_AveragePool_13x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_172x1x3x3_Relu To S9_Conv2d_172x172x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x172x1x1_Relu To S8_Conv2d_172x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_172x1x3x3_Relu To S7_Conv2d_172x172x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_172x172x1x1_Relu To S6_Conv2d_172x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x3x3_Relu To S5_Conv2d_172x172x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_172x172x1x1_Relu To S4_Conv2d_172x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_172x1x3x3_Relu To S3_Conv2d_172x172x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_172x1x10x4_Relu To S2_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x172x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_13x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_172x172x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x172x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_172x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_172x172x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_172x172x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_172x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S1_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => S1_Weights --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => S1_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S1_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S1_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => S2_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S2_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S2_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S3_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S3_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S3_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S3_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S4_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S4_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S5_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S5_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S5_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S5_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S6_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S6_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S6_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S7_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S7_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => S8_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S8_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I Buff In2 => S8_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 32056 +CI In1 => S9_Weights --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Bias => S9_Biases --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 22016 + O Out => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 22704 +CI Buff Scale => S9_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31024 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 31196 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43236 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 52985, Move overhead: 1.000000 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.027554] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_13x5, Operations: 11180 + I Buff In => S9_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x172x1x1, Operations: 2064 + I Buff In => S10_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 409672, Move overhead: 1.002373 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 409672, Tiling Overhead Average: 1.002373 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S2_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S3_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S4_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S5_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S6_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S7_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S8_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S9_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S11_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_13x5 +Generating Code For User Kernel: S11_Linear_12x172x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 688 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45484 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 409672 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.002373 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 2.60742 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_MEDIUM.h +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DMEDIUM +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 24894 (Min: 0, Max: 25622), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 3.200000, Size: 4096, Total: 10576, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 3.200000, Size: 4096, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 15696, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 15856, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 15860, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 17908, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 22004, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 24052, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 24372, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 25360, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 25400, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 28600, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 28600, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 28600, Reusable Memory: 20136, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 28600 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 277/ 300 Accuracy: 92.33% +Pred/Tot: 367/ 400 Accuracy: 91.75% +Pred/Tot: 462/ 500 Accuracy: 92.40% +Pred/Tot: 551/ 600 Accuracy: 91.83% +Pred/Tot: 642/ 700 Accuracy: 91.71% +Pred/Tot: 735/ 800 Accuracy: 91.88% +Pred/Tot: 828/ 900 Accuracy: 92.00% +Pred/Tot: 921/1000 Accuracy: 92.10% +Pred/Tot: 1011/1100 Accuracy: 91.91% +Pred/Tot: 1102/1200 Accuracy: 91.83% +Pred/Tot: 1193/1300 Accuracy: 91.77% +Pred/Tot: 1280/1400 Accuracy: 91.43% +Pred/Tot: 1365/1500 Accuracy: 91.00% +Pred/Tot: 1460/1600 Accuracy: 91.25% +Pred/Tot: 1551/1700 Accuracy: 91.24% +Pred/Tot: 1645/1800 Accuracy: 91.39% +Pred/Tot: 1739/1900 Accuracy: 91.53% +Pred/Tot: 1834/2000 Accuracy: 91.70% +Pred/Tot: 1926/2100 Accuracy: 91.71% +Pred/Tot: 2020/2200 Accuracy: 91.82% +Pred/Tot: 2114/2300 Accuracy: 91.91% +Pred/Tot: 2205/2400 Accuracy: 91.88% +Pred/Tot: 2298/2500 Accuracy: 91.92% +Pred/Tot: 2391/2600 Accuracy: 91.96% +Pred/Tot: 2483/2700 Accuracy: 91.96% +Pred/Tot: 2575/2800 Accuracy: 91.96% +Pred/Tot: 2666/2900 Accuracy: 91.93% +Pred/Tot: 2759/3000 Accuracy: 91.97% +Pred/Tot: 2856/3100 Accuracy: 92.13% +Pred/Tot: 2950/3200 Accuracy: 92.19% +Pred/Tot: 3042/3300 Accuracy: 92.18% +Pred/Tot: 3131/3400 Accuracy: 92.09% +Pred/Tot: 3220/3500 Accuracy: 92.00% +Pred/Tot: 3316/3600 Accuracy: 92.11% +Pred/Tot: 3406/3700 Accuracy: 92.05% +Pred/Tot: 3497/3800 Accuracy: 92.03% +Pred/Tot: 3588/3900 Accuracy: 92.00% +Pred/Tot: 3679/4000 Accuracy: 91.97% +Pred/Tot: 3771/4100 Accuracy: 91.98% +Pred/Tot: 3863/4200 Accuracy: 91.98% +Pred/Tot: 3954/4300 Accuracy: 91.95% +Pred/Tot: 4046/4400 Accuracy: 91.95% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4086/4444 Accuracy: 91.94% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 324 1 5 2 8 4 5 9 3 5 5] + [ 0 7 381 1 0 0 7 0 0 0 0 1] + [ 1 10 6 366 0 11 2 0 0 0 1 9] + [ 0 5 1 0 319 2 2 0 1 7 12 1] + [ 1 7 0 10 1 355 0 0 0 0 0 3] + [ 0 7 5 1 3 0 334 1 0 0 0 1] + [ 0 14 1 0 1 0 4 341 0 0 1 1] + [ 2 16 0 1 3 3 0 0 334 4 0 0] + [ 1 4 0 0 31 1 1 2 8 313 9 3] + [ 4 2 0 0 5 1 1 0 1 1 333 2] + [ 0 9 1 22 1 17 0 1 1 1 4 315]] +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 183/ 200 Accuracy: 91.50% +Pred/Tot: 276/ 300 Accuracy: 92.00% +Pred/Tot: 365/ 400 Accuracy: 91.25% +Pred/Tot: 456/ 500 Accuracy: 91.20% +Pred/Tot: 547/ 600 Accuracy: 91.17% +Pred/Tot: 637/ 700 Accuracy: 91.00% +Pred/Tot: 729/ 800 Accuracy: 91.12% +Pred/Tot: 821/ 900 Accuracy: 91.22% +Pred/Tot: 912/1000 Accuracy: 91.20% +Pred/Tot: 1002/1100 Accuracy: 91.09% +Pred/Tot: 1095/1200 Accuracy: 91.25% +Pred/Tot: 1183/1300 Accuracy: 91.00% +Pred/Tot: 1279/1400 Accuracy: 91.36% +Pred/Tot: 1371/1500 Accuracy: 91.40% +Pred/Tot: 1466/1600 Accuracy: 91.62% +Pred/Tot: 1558/1700 Accuracy: 91.65% +Pred/Tot: 1654/1800 Accuracy: 91.89% +Pred/Tot: 1744/1900 Accuracy: 91.79% +Pred/Tot: 1830/2000 Accuracy: 91.50% +Pred/Tot: 1922/2100 Accuracy: 91.52% +Pred/Tot: 2015/2200 Accuracy: 91.59% +Pred/Tot: 2107/2300 Accuracy: 91.61% +Pred/Tot: 2195/2400 Accuracy: 91.46% +Pred/Tot: 2286/2500 Accuracy: 91.44% +Pred/Tot: 2377/2600 Accuracy: 91.42% +Pred/Tot: 2468/2700 Accuracy: 91.41% +Pred/Tot: 2557/2800 Accuracy: 91.32% +Pred/Tot: 2647/2900 Accuracy: 91.28% +Pred/Tot: 2741/3000 Accuracy: 91.37% +Pred/Tot: 2832/3100 Accuracy: 91.35% +Pred/Tot: 2926/3200 Accuracy: 91.44% +Pred/Tot: 3021/3300 Accuracy: 91.55% +Pred/Tot: 3114/3400 Accuracy: 91.59% +Pred/Tot: 3207/3500 Accuracy: 91.63% +Pred/Tot: 3302/3600 Accuracy: 91.72% +Pred/Tot: 3392/3700 Accuracy: 91.68% +Pred/Tot: 3483/3800 Accuracy: 91.66% +Pred/Tot: 3578/3900 Accuracy: 91.74% +Pred/Tot: 3671/4000 Accuracy: 91.77% +Pred/Tot: 3760/4100 Accuracy: 91.71% +Pred/Tot: 3851/4200 Accuracy: 91.69% +Pred/Tot: 3945/4300 Accuracy: 91.74% +Pred/Tot: 4035/4400 Accuracy: 91.70% +Pred/Tot: 4125/4500 Accuracy: 91.67% +Pred/Tot: 4215/4600 Accuracy: 91.63% +Pred/Tot: 4306/4700 Accuracy: 91.62% +Pred/Tot: 4397/4800 Accuracy: 91.60% + +FINAL TESTING ACCURACY: +Pred/Tot: 4479/4889 Accuracy: 91.61% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 1 369 0 6 4 6 6 4 6 0 2 4] + [ 0 13 385 6 0 1 13 1 0 0 0 0] + [ 0 7 2 372 0 14 4 0 0 0 0 6] + [ 1 8 0 0 396 2 1 0 5 6 5 1] + [ 0 12 3 10 1 372 1 1 1 0 0 5] + [ 0 12 6 2 0 1 388 2 0 0 1 0] + [ 0 23 0 0 1 1 6 363 0 0 1 1] + [ 1 12 0 0 7 3 0 0 361 7 1 4] + [ 0 8 0 2 30 2 1 0 9 344 2 4] + [ 0 4 0 1 6 6 1 0 0 0 390 3] + [ 0 7 4 38 3 14 1 0 0 0 4 331]] diff --git a/accuracy_log/log_test_medium_spectr_v2.txt b/accuracy_log/log_test_medium_spectr_v2.txt new file mode 100644 index 0000000..d5758dd --- /dev/null +++ b/accuracy_log/log_test_medium_spectr_v2.txt @@ -0,0 +1,2090 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -32.63<(i8-0.00)*0.25492236<32.38 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -32.63<(i8-0.00)*0.25492236<32.38 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x172 B 1 +backwards FULLY_CONNECTED_0_10 in: -9.26<(i8-0.00)*0.07232232<9.18,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 13:54:28.194895 139954392971072 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 13:54:28.195389 139954392971072 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 13:54:28.195670 139954392971072 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 13:54:28.197004: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 13:54:28.205698: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 13:54:28.206151: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5642eb57d520 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 13:54:28.206211: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 13:54:28.208853: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 13:54:28.208921: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 13:54:28.208999: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 13:56:34.251579 139954392971072 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 13:56:34.254798 139954392971072 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 13:56:34.471113 139954392971072 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 13:56:34.471646 139954392971072 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 13:59:34.757465 139954392971072 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 278/ 300 Accuracy: 92.67% +Pred/Tot: 371/ 400 Accuracy: 92.75% +Pred/Tot: 468/ 500 Accuracy: 93.60% +Pred/Tot: 557/ 600 Accuracy: 92.83% +Pred/Tot: 651/ 700 Accuracy: 93.00% +Pred/Tot: 744/ 800 Accuracy: 93.00% +Pred/Tot: 839/ 900 Accuracy: 93.22% +Pred/Tot: 937/1000 Accuracy: 93.70% +Pred/Tot: 1033/1100 Accuracy: 93.91% +Pred/Tot: 1125/1200 Accuracy: 93.75% +Pred/Tot: 1213/1300 Accuracy: 93.31% +Pred/Tot: 1305/1400 Accuracy: 93.21% +Pred/Tot: 1391/1500 Accuracy: 92.73% +Pred/Tot: 1489/1600 Accuracy: 93.06% +Pred/Tot: 1583/1700 Accuracy: 93.12% +Pred/Tot: 1679/1800 Accuracy: 93.28% +Pred/Tot: 1771/1900 Accuracy: 93.21% +Pred/Tot: 1865/2000 Accuracy: 93.25% +Pred/Tot: 1957/2100 Accuracy: 93.19% +Pred/Tot: 2050/2200 Accuracy: 93.18% +Pred/Tot: 2144/2300 Accuracy: 93.22% +Pred/Tot: 2235/2400 Accuracy: 93.12% +Pred/Tot: 2324/2500 Accuracy: 92.96% +Pred/Tot: 2417/2600 Accuracy: 92.96% +Pred/Tot: 2512/2700 Accuracy: 93.04% +Pred/Tot: 2606/2800 Accuracy: 93.07% +Pred/Tot: 2698/2900 Accuracy: 93.03% +Pred/Tot: 2787/3000 Accuracy: 92.90% +Pred/Tot: 2881/3100 Accuracy: 92.94% +Pred/Tot: 2978/3200 Accuracy: 93.06% +Pred/Tot: 3069/3300 Accuracy: 93.00% +Pred/Tot: 3158/3400 Accuracy: 92.88% +Pred/Tot: 3249/3500 Accuracy: 92.83% +Pred/Tot: 3340/3600 Accuracy: 92.78% +Pred/Tot: 3432/3700 Accuracy: 92.76% +Pred/Tot: 3527/3800 Accuracy: 92.82% +Pred/Tot: 3621/3900 Accuracy: 92.85% +Pred/Tot: 3714/4000 Accuracy: 92.85% +Pred/Tot: 3809/4100 Accuracy: 92.90% +Pred/Tot: 3900/4200 Accuracy: 92.86% +Pred/Tot: 3995/4300 Accuracy: 92.91% +Pred/Tot: 4084/4400 Accuracy: 92.82% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4128/4444 Accuracy: 92.89% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 300 6 4 7 8 6 10 10 1 5 14] + [ 0 2 390 1 0 0 0 0 0 0 0 4] + [ 1 5 3 370 0 6 0 0 0 1 0 20] + [ 0 3 1 0 327 1 3 0 1 5 7 2] + [ 0 4 1 5 0 357 2 0 0 0 1 7] + [ 0 1 10 2 1 0 334 1 0 1 0 2] + [ 0 5 3 1 0 0 1 351 0 0 0 2] + [ 1 9 0 2 2 3 0 0 332 9 0 5] + [ 0 0 0 1 34 0 1 1 8 317 5 6] + [ 1 2 1 0 11 4 0 0 0 2 328 1] + [ 0 2 2 6 1 6 0 0 1 0 3 351]] +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 276/ 300 Accuracy: 92.00% +Pred/Tot: 367/ 400 Accuracy: 91.75% +Pred/Tot: 457/ 500 Accuracy: 91.40% +Pred/Tot: 549/ 600 Accuracy: 91.50% +Pred/Tot: 644/ 700 Accuracy: 92.00% +Pred/Tot: 737/ 800 Accuracy: 92.12% +Pred/Tot: 831/ 900 Accuracy: 92.33% +Pred/Tot: 925/1000 Accuracy: 92.50% +Pred/Tot: 1014/1100 Accuracy: 92.18% +Pred/Tot: 1110/1200 Accuracy: 92.50% +Pred/Tot: 1199/1300 Accuracy: 92.23% +Pred/Tot: 1292/1400 Accuracy: 92.29% +Pred/Tot: 1384/1500 Accuracy: 92.27% +Pred/Tot: 1478/1600 Accuracy: 92.38% +Pred/Tot: 1573/1700 Accuracy: 92.53% +Pred/Tot: 1668/1800 Accuracy: 92.67% +Pred/Tot: 1762/1900 Accuracy: 92.74% +Pred/Tot: 1852/2000 Accuracy: 92.60% +Pred/Tot: 1946/2100 Accuracy: 92.67% +Pred/Tot: 2042/2200 Accuracy: 92.82% +Pred/Tot: 2135/2300 Accuracy: 92.83% +Pred/Tot: 2226/2400 Accuracy: 92.75% +Pred/Tot: 2317/2500 Accuracy: 92.68% +Pred/Tot: 2409/2600 Accuracy: 92.65% +Pred/Tot: 2502/2700 Accuracy: 92.67% +Pred/Tot: 2595/2800 Accuracy: 92.68% +Pred/Tot: 2684/2900 Accuracy: 92.55% +Pred/Tot: 2776/3000 Accuracy: 92.53% +Pred/Tot: 2868/3100 Accuracy: 92.52% +Pred/Tot: 2963/3200 Accuracy: 92.59% +Pred/Tot: 3060/3300 Accuracy: 92.73% +Pred/Tot: 3154/3400 Accuracy: 92.76% +Pred/Tot: 3246/3500 Accuracy: 92.74% +Pred/Tot: 3343/3600 Accuracy: 92.86% +Pred/Tot: 3434/3700 Accuracy: 92.81% +Pred/Tot: 3524/3800 Accuracy: 92.74% +Pred/Tot: 3621/3900 Accuracy: 92.85% +Pred/Tot: 3714/4000 Accuracy: 92.85% +Pred/Tot: 3807/4100 Accuracy: 92.85% +Pred/Tot: 3903/4200 Accuracy: 92.93% +Pred/Tot: 3996/4300 Accuracy: 92.93% +Pred/Tot: 4085/4400 Accuracy: 92.84% +Pred/Tot: 4178/4500 Accuracy: 92.84% +Pred/Tot: 4268/4600 Accuracy: 92.78% +Pred/Tot: 4358/4700 Accuracy: 92.72% +Pred/Tot: 4452/4800 Accuracy: 92.75% + +FINAL TESTING ACCURACY: +Pred/Tot: 4534/4889 Accuracy: 92.74% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 346 2 7 3 10 7 10 6 1 7 9] + [ 0 6 406 2 1 0 2 0 0 0 0 2] + [ 0 3 4 373 1 11 0 0 0 0 0 13] + [ 0 4 0 0 399 3 1 1 2 3 5 7] + [ 0 5 3 10 0 378 1 0 0 0 3 6] + [ 0 3 11 2 1 0 389 4 0 0 2 0] + [ 0 11 0 0 0 1 3 378 0 0 1 2] + [ 0 8 0 0 4 6 0 0 362 8 0 8] + [ 0 3 0 2 37 1 1 0 10 331 2 15] + [ 0 3 2 0 3 6 0 1 0 0 390 6] + [ 0 6 1 13 2 4 1 0 0 0 1 374]] diff --git a/accuracy_log/log_test_medium_spectr_v2_norm9.txt b/accuracy_log/log_test_medium_spectr_v2_norm9.txt new file mode 100644 index 0000000..8937f7b --- /dev/null +++ b/accuracy_log/log_test_medium_spectr_v2_norm9.txt @@ -0,0 +1,2090 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_m_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=0 MEDIUM=1 LARGE=0 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_m_quant.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -32.63<(i8-0.00)*0.25492236<32.38 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -64.00<(i8-0.00)*0.50000000<63.50 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -32.63<(i8-0.00)*0.25492236<32.38 need -64.00<(i8-0.00)*0.50000000<63.50 forced +go backwackwards to F 12x1x1x172 B 1 +backwards FULLY_CONNECTED_0_10 in: -9.26<(i8-0.00)*0.07232232<9.18,chan<(i8-0.00)*chan W: 10, Pad:[1,2] PadT:[1,2] => Wc: 10, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 10, Pad:[0,0] => Wo: 10, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [10 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1763000 + +==== Process Tiling For User Kernel: S4_Conv2d_172x1x10x4_Relu ======================= +S4_Conv2d_172x1x10x4_Relu Partition[0] Size = 194929 (Min: 200, Max: 275121), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Bias, was using 704 Bytes will require 688 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: Scale, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu Full buffering on Arg: ScaleN, was using 176 Bytes will require 172 Bytes buffer +S4_Conv2d_172x1x10x4_Relu, TiledSpace: Tile0 Iteration Count: 4 Parametric Space: [D1, M0=88] Parametric Space: [D0, M1=1] + In : Ratio: 2.000000, FixDim: 10, VarDim: 22 [ 49], Size: 440, Total: 440, Move: 1460 (Decl x 2.979592) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 1128, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 1300, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 1472, Move: 172 (Decl x 1.000000) L2 +@ Filter : Ratio: 0.000000, Size: 7040, Total: 8512, Move: 6880 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 12320, Total: 20832, Move: 43000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 10, VarDim: 7 [ 25], Size: 24640, Total: 45472, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45484, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_172x1x10x4_Relu - IterSpace: Tile0 - L1 Memory: 45484, L2Move: 52381, L3Move: 0, Tiling Overhead: 1.018868 +S4_Conv2d_172x1x10x4_Relu Partial buffering on Arg: Filter, From: D0 To: D1. Current is (Par) 1 x [W:1, H:1] x 40 => Partial buffer size is 7040 Bytes +S4_Conv2d_172x1x10x4_Relu Found Parametric value for space D1 (Initial: 172, Div: 8) = 88 [88*1 + 84] and space D0 (Initial: 1, Div: 4) = 1 [1*1 + 0], Iteration for Tiled Space: 4 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: In, Size: 220, Base1: 0, Base2: 220 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Bias, Size: 688, Base1: 440, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Scale, Size: 172, Base1: 1128, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ScaleN, Size: 172, Base1: 1300, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Filter, Size: 3520, Base1: 1472, Base2: 4992 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Out, Size: 6160, Base1: 8512, Base2: 14672 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: ConvOut, Size: 24640, Base1: 20832, Base2: 0 +Ker: S4_Conv2d_172x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 45472, Base2: 0 +S4_Conv2d_172x1x10x4_Relu For Iter Space: 0 Iteration count: 4 (Last one is truncated), Given L1 Memory: 48736, Used L1 Memory: 45484, Reusable Memory: 3252, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 10, Pad:[0,1] PadT:[0,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 1 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 25] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride2B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S7_Conv2d_172x1x3x3_Relu ======================= +S7_Conv2d_172x1x3x3_Relu Partition[0] Size = 63665 (Min: 60, Max: 165257), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=13 + In Dim: 27, TileOverlap: 1, Ratio: 2.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 384 Bytes will require 688 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 96 Bytes will require 172 Bytes buffer +S7_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 864 Bytes will require 1548 Bytes buffer +S7_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=48] + In : Ratio: 2.000000, FixDim: 10, VarDim: 25 [ 25], Size: 24000, Total: 24000, Move: 43000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 24688, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 24860, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 25032, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 26580, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 6240, Total: 32820, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 12480, Total: 45300, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 45312, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 45312, L2Move: 56769, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 48 [48*3 + 28], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: In, Size: 12000, Base1: 0, Base2: 12000 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 24000, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 24688, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 24860, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 25032, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 3120, Base1: 26580, Base2: 29700 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 12480, Base1: 32820, Base2: 0 +Ker: S7_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 45300, Base2: 0 +S7_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 45312, Reusable Memory: 3424, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_172x172x1x1_Relu ======================= +S10_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S10_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S10_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S10_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S10_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S10_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S10_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S13_Conv2d_172x1x3x3_Relu ======================= +S13_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S13_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S13_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S13_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S13_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_172x172x1x1_Relu ======================= +S16_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S16_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S16_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S16_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S16_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S16_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S16_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S19_Conv2d_172x1x3x3_Relu ======================= +S19_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S19_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S19_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S19_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S19_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_172x172x1x1_Relu ======================= +S22_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S22_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S22_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S22_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S22_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S22_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S22_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 13, Pad:[1,1] PadT:[1,1] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 111800 + +==== Process Tiling For User Kernel: S25_Conv2d_172x1x3x3_Relu ======================= +S25_Conv2d_172x1x3x3_Relu Partition[0] Size = 56785 (Min: 30, Max: 98177), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=13 + In Dim: 15, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 13, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 13, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Bias, was using 640 Bytes will require 688 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Scale, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: ScaleN, was using 160 Bytes will require 172 Bytes buffer +S25_Conv2d_172x1x3x3_Relu Full buffering on Arg: Filter, was using 1440 Bytes will require 1548 Bytes buffer +S25_Conv2d_172x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=80] + In : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 10400, Move: 11180 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 688, Total: 11088, Move: 688 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 172, Total: 11260, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 172, Total: 11432, Move: 172 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 1548, Total: 12980, Move: 1548 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 10400, Total: 23380, Move: 11180 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 13 [ 13], Size: 20800, Total: 44180, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 44192, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_172x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 44192, L2Move: 24949, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_172x1x3x3_Relu Found Parametric value for space D0 (Initial: 172, Div: 8) = 80 [80*2 + 12], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: In, Size: 5200, Base1: 0, Base2: 5200 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Bias, Size: 688, Base1: 10400, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Scale, Size: 172, Base1: 11088, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ScaleN, Size: 172, Base1: 11260, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Filter, Size: 1548, Base1: 11432, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Out, Size: 5200, Base1: 12980, Base2: 18180 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: ConvOut, Size: 20800, Base1: 23380, Base2: 0 +Ker: S25_Conv2d_172x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 44180, Base2: 0 +S25_Conv2d_172x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 44192, Reusable Memory: 4544, Used L2 Memory: 0 +================================================================================================= + +InFeat: 172, OutFeat: 172 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 13, Pad:[0,0] PadT:[0,0] => Hc: 13 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 13, Pad:[0,0] => Ho: 13 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 13] +UsedC : [5 x 13] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 1934140 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_172x172x1x1_Relu +In1 => W: 172, H: 172 +In2 => W: 65, H: 172, w: 5, h: 13, Sx: 1, Sy: 1 +Out => W: 65, H: 172 => Line First + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_172x172x1x1_Relu ======================= +S28_Conv2d_172x172x1x1_Relu Partition[0] Size = 1401 (Min: 1376, Max: 22401), Fraction: 0.20, Giving: 9854 Bytes out of 48736 Bytes +S28_Conv2d_172x172x1x1_Relu Partition[1] Size = 5528 (Min: 2752, Max: 83344), Fraction: 0.80, Giving: 38881 Bytes out of 48736 Bytes + +Reference object: In1, Dim=172 + In1 Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 172, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 172, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 172, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile1 Iteration Count: 3 +* KerBuff : Ratio: 0.000000, Size: 688, Total: 688, Move: 0 (Decl x 0.000000) L2 + In1 : Ratio: 1.000000, FixDim: 172, VarDim: 72 [ 172], Size: 24768, Total: 25456, Move: 29584 (Decl x 1.000000) L2 +* Bias : Ratio: 1.000000, Size: 688, Total: 26144, Move: 688 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 65, VarDim: 72 [ 172], Size: 9360, Total: 35504, Move: 11180 (Decl x 1.000000) L2 +* Scale : Ratio: 1.000000, Size: 172, Total: 35676, Move: 172 (Decl x 1.000000) L2 +* ScaleN : Ratio: 1.000000, Size: 172, Total: 35848, Move: 172 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile1 - L1 Memory: 35848, L2Move: 41796, L3Move: 0, Tiling Overhead: 1.000000 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: KerBuff, Size: 688, Base1: 0, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In1, Size: 12384, Base1: 688, Base2: 13072 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Bias, Size: 688, Base1: 25456, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Out, Size: 4680, Base1: 26144, Base2: 30824 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Scale, Size: 172, Base1: 35504, Base2: 0 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: ScaleN, Size: 172, Base1: 35676, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 1 Iteration count: 3 (Last one is truncated), Given L1 Memory: 38881, Used L1 Memory: 35848, Reusable Memory: 3032, Used L2 Memory: 0 + +Reference object: In2, Dim=65 + In2 Dim: 65, TileOverlap: 0, Ratio: 1.000000 + +S28_Conv2d_172x172x1x1_Relu, TiledSpace: Tile0 Iteration Count: 3 + In2 : Ratio: 1.000000, FixDim: 172, VarDim: 28 [ 65], Size: 9632, Total: 9632, Move: 33540 (Decl x 3.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 9644, Move: 9 (Decl x 1.000000) L2 +S28_Conv2d_172x172x1x1_Relu - IterSpace: Tile0 - L1 Memory: 9644, L2Move: 33549, L3Move: 0, Tiling Overhead: 2.998391 +S28_Conv2d_172x172x1x1_Relu Iteration for Tiled Space: 3 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: In2, Size: 4816, Base1: 35848, Base2: 40664 +Ker: S28_Conv2d_172x172x1x1_Relu, Arg: Infos, Size: 12, Base1: 45480, Base2: 0 +S28_Conv2d_172x172x1x1_Relu For Iter Space: 0 Iteration count: 3 (Last one is truncated), Given L1 Memory: 9854, Used L1 Memory: 9644, Reusable Memory: 208, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 13, Pad:[0,0] => Ho: 1 +OverlapP: 11 +TileCons: 2 +UsedIn : [5 x 13] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 11180 + +==== Process Tiling For User Kernel: S29_AveragePool_13x5 ======================= +S29_AveragePool_13x5 Partition[0] Size = 23063 (Min: 130, Max: 22731), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 13, TileOverlap: 11, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_13x5, Total Raw Memory: 11364 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_13x5, Arg: In, Size: 11180, Base1: 0, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Out, Size: 172, Base1: 11180, Base2: 0 +Ker: S29_AveragePool_13x5, Arg: Infos, Size: 12, Base1: 11352, Base2: 0 +S29_AveragePool_13x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 11364, Reusable Memory: 37372, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x172x1x1, Linear: InDim: 172, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x172x1x1 ======================= +S32_Linear_12x172x1x1 Partition[0] Size = 4491 (Min: 0, Max: 4575), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x172x1x1, Total Raw Memory: 2332 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x172x1x1, Arg: In, Size: 172, Base1: 0, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Filter, Size: 2064, Base1: 172, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Bias, Size: 48, Base1: 2236, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Out, Size: 12, Base1: 2284, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Scale, Size: 12, Base1: 2296, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: ScaleN, Size: 12, Base1: 2308, Base2: 0 +Ker: S32_Linear_12x172x1x1, Arg: Infos, Size: 12, Base1: 2320, Base2: 0 +S32_Linear_12x172x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 2332, Reusable Memory: 46404, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x172x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_13x5 To S32_Linear_12x172x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_172x172x1x1_Relu To S29_AveragePool_13x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_172x1x3x3_Relu To S28_Conv2d_172x172x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_172x172x1x1_Relu To S25_Conv2d_172x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_172x1x3x3_Relu To S22_Conv2d_172x172x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_172x172x1x1_Relu To S19_Conv2d_172x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_172x1x3x3_Relu To S16_Conv2d_172x172x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_172x172x1x1_Relu To S13_Conv2d_172x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_172x1x3x3_Relu To S10_Conv2d_172x172x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_172x1x10x4_Relu To S7_Conv2d_172x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x172x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_13x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_172x172x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_172x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_172x172x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_172x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_172x172x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_172x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_172x172x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_172x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_172x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 54180 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 197153 => Alloc: OK + +[FULL] Remapping [54180 .. 197152] to [0 .. 142972] Align compensation: 3 +[PART] Remapping [0 .. 54179] to [142976 .. 197155] Align compensation: 0 +[PART] Remapping [197153 .. 349999] to [197156 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_m_quantCNN is sucessfull, L2: 197153 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 1 3 4 5 6 7 8 9 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 1: UKer S4_Conv2d_172x1x10x4_Relu, Operations: 1763000 + I In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 1460, TileOverhead: 2.979592, L2Buff: 0, Addr: 0 +CI PartBuff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 6880, L3_Move: 0, L2_Move: 6880, TileOverhead: 1.000000, L2Buff: 0, Addr: 1472 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 440 + O Out => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8512 +CI Buff Scale => S4_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1128 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 1300 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45472 + Kernel Memory : L3: 0, L2: 51411 + Kernel Total Memory: 51411, L3 moves: 0, L2 moves: 52381, Move overhead: 1.018868 + Kernel Operations : 1763000 [KernelOper/GraphOper: 17.780609%], Move/Operation ratio: [L3: 0.000000, L2: 0.029711] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S4_Output --L2-- Size: 43000, L3_Move: 0, L2_Move: 43000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 25032 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 24000 + O Out => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26580 +CI Buff Scale => S7_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24688 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 24860 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45300 + Kernel Memory : L3: 0, L2: 56769 + Kernel Total Memory: 56769, L3 moves: 0, L2 moves: 56769, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.507773] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S7_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S10_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S10_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S13_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S13_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S16_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S16_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S19_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S19_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S22_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_172x1x3x3_Relu, Operations: 111800 + I In => S22_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 1548, L3_Move: 0, L2_Move: 1548, TileOverhead: 1.000000, L2Buff: 0, Addr: 11432 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 10400 + O Out => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 12980 +CI Buff Scale => S25_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11088 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11260 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 44180 + Kernel Memory : L3: 0, L2: 24949 + Kernel Total Memory: 24949, L3 moves: 0, L2 moves: 24949, Move overhead: 1.000000 + Kernel Operations : 111800 [KernelOper/GraphOper: 1.127551%], Move/Operation ratio: [L3: 0.000000, L2: 0.223157] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_172x172x1x1_Relu, Operations: 1922960 + I In2 => S25_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 33540, TileOverhead: 3.000000, L2Buff: 0, Addr: 35848 +CI In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 29584, L3_Move: 0, L2_Move: 29584, TileOverhead: 1.000000, L2Buff: 0, Addr: 688 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 688, L3_Move: 0, L2_Move: 688, TileOverhead: 1.000000, L2Buff: 0, Addr: 25456 + O Out => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 26144 +CI Buff Scale => S28_Mul_scale --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35504 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 35676 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 45480 + Kernel Memory : L3: 0, L2: 52985 + Kernel Total Memory: 52985, L3 moves: 0, L2 moves: 75345, Move overhead: 1.422006 + Kernel Operations : 1922960 [KernelOper/GraphOper: 19.393873%], Move/Operation ratio: [L3: 0.000000, L2: 0.039182] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_13x5, Operations: 11180 + I Buff In => S28_Output --L2-- Size: 11180, L3_Move: 0, L2_Move: 11180, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 11180 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 11352 + Kernel Memory : L3: 0, L2: 11361 + Kernel Total Memory: 11361, L3 moves: 0, L2 moves: 11361, Move overhead: 1.000000 + Kernel Operations : 11180 [KernelOper/GraphOper: 0.112755%], Move/Operation ratio: [L3: 0.000000, L2: 1.016190] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x172x1x1, Operations: 2064 + I Buff In => S29_Output --L2-- Size: 172, L3_Move: 0, L2_Move: 172, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 2064, L3_Move: 0, L2_Move: 2064, TileOverhead: 1.000000, L2Buff: 0, Addr: 172 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 2236 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2284 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2296 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 2308 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 2320 + Kernel Memory : L3: 0, L2: 2329 + Kernel Total Memory: 2329, L3 moves: 0, L2 moves: 2329, Move overhead: 1.000000 + Kernel Operations : 2064 [KernelOper/GraphOper: 0.020816%], Move/Operation ratio: [L3: 0.000000, L2: 1.128392] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000121%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 56769 + Graph nodes min global memory: L3: 0, L2: 56772 + Graph sum of kernel arguments size: 408702, L3 moves: 0, L2 moves: 499112, Move overhead: 1.221213 + Graph total operations: 9915296 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 408702, Total L3_Move: 0, Total L2_Move: 499112, Tiling Overhead Average: 1.221213 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 54180 + Const 0 30628 + Total 0 56772 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 118336 LOAD: L2[ 0: 13]@ 118336 EXEC: L2[ 0: 13]@ 118336 , Size: 6880 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 133472 LOAD: L2[ 0: 13]@ 133472 EXEC: L2[ 0: 13]@ 133472 , Size: 688 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 139664 LOAD: L2[ 0: 13]@ 139664 EXEC: L2[ 0: 13]@ 139664 , Size: 172 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 139836 LOAD: L2[ 0: 13]@ 139836 EXEC: L2[ 0: 13]@ 139836 , Size: 172 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 142808 LOAD: L2[ 0: 13]@ 142808 EXEC: L2[ 0: 13]@ 142808 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 127280 LOAD: L2[ 0: 13]@ 127280 EXEC: L2[ 0: 13]@ 127280 , Size: 1548 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 134160 LOAD: L2[ 0: 13]@ 134160 EXEC: L2[ 0: 13]@ 134160 , Size: 688 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140008 LOAD: L2[ 0: 13]@ 140008 EXEC: L2[ 0: 13]@ 140008 , Size: 172 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140180 LOAD: L2[ 0: 13]@ 140180 EXEC: L2[ 0: 13]@ 140180 , Size: 172 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 142820 LOAD: L2[ 0: 13]@ 142820 EXEC: L2[ 0: 13]@ 142820 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 29584 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 134848 LOAD: L2[ 0: 13]@ 134848 EXEC: L2[ 0: 13]@ 134848 , Size: 688 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140352 LOAD: L2[ 0: 13]@ 140352 EXEC: L2[ 0: 13]@ 140352 , Size: 172 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140524 LOAD: L2[ 0: 13]@ 140524 EXEC: L2[ 0: 13]@ 140524 , Size: 172 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 142832 LOAD: L2[ 0: 13]@ 142832 EXEC: L2[ 0: 13]@ 142832 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 128828 LOAD: L2[ 0: 13]@ 128828 EXEC: L2[ 0: 13]@ 128828 , Size: 1548 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 135536 LOAD: L2[ 0: 13]@ 135536 EXEC: L2[ 0: 13]@ 135536 , Size: 688 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 140696 LOAD: L2[ 0: 13]@ 140696 EXEC: L2[ 0: 13]@ 140696 , Size: 172 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 140868 LOAD: L2[ 0: 13]@ 140868 EXEC: L2[ 0: 13]@ 140868 , Size: 172 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 142844 LOAD: L2[ 0: 13]@ 142844 EXEC: L2[ 0: 13]@ 142844 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 29584 LOAD: L2[ 0: 13]@ 29584 EXEC: L2[ 0: 13]@ 29584 , Size: 29584 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 136224 LOAD: L2[ 0: 13]@ 136224 EXEC: L2[ 0: 13]@ 136224 , Size: 688 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141040 LOAD: L2[ 0: 13]@ 141040 EXEC: L2[ 0: 13]@ 141040 , Size: 172 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141212 LOAD: L2[ 0: 13]@ 141212 EXEC: L2[ 0: 13]@ 141212 , Size: 172 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 142856 LOAD: L2[ 0: 13]@ 142856 EXEC: L2[ 0: 13]@ 142856 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 130376 LOAD: L2[ 0: 13]@ 130376 EXEC: L2[ 0: 13]@ 130376 , Size: 1548 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 136912 LOAD: L2[ 0: 13]@ 136912 EXEC: L2[ 0: 13]@ 136912 , Size: 688 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141384 LOAD: L2[ 0: 13]@ 141384 EXEC: L2[ 0: 13]@ 141384 , Size: 172 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141556 LOAD: L2[ 0: 13]@ 141556 EXEC: L2[ 0: 13]@ 141556 , Size: 172 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 142868 LOAD: L2[ 0: 13]@ 142868 EXEC: L2[ 0: 13]@ 142868 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 59168 LOAD: L2[ 0: 13]@ 59168 EXEC: L2[ 0: 13]@ 59168 , Size: 29584 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 137600 LOAD: L2[ 0: 13]@ 137600 EXEC: L2[ 0: 13]@ 137600 , Size: 688 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 141728 LOAD: L2[ 0: 13]@ 141728 EXEC: L2[ 0: 13]@ 141728 , Size: 172 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 141900 LOAD: L2[ 0: 13]@ 141900 EXEC: L2[ 0: 13]@ 141900 , Size: 172 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 142880 LOAD: L2[ 0: 13]@ 142880 EXEC: L2[ 0: 13]@ 142880 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 131924 LOAD: L2[ 0: 13]@ 131924 EXEC: L2[ 0: 13]@ 131924 , Size: 1548 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 138288 LOAD: L2[ 0: 13]@ 138288 EXEC: L2[ 0: 13]@ 138288 , Size: 688 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142072 LOAD: L2[ 0: 13]@ 142072 EXEC: L2[ 0: 13]@ 142072 , Size: 172 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142244 LOAD: L2[ 0: 13]@ 142244 EXEC: L2[ 0: 13]@ 142244 , Size: 172 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 142892 LOAD: L2[ 0: 13]@ 142892 EXEC: L2[ 0: 13]@ 142892 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 88752 LOAD: L2[ 0: 13]@ 88752 EXEC: L2[ 0: 13]@ 88752 , Size: 29584 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 138976 LOAD: L2[ 0: 13]@ 138976 EXEC: L2[ 0: 13]@ 138976 , Size: 688 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142416 LOAD: L2[ 0: 13]@ 142416 EXEC: L2[ 0: 13]@ 142416 , Size: 172 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142588 LOAD: L2[ 0: 13]@ 142588 EXEC: L2[ 0: 13]@ 142588 , Size: 172 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 142904 LOAD: L2[ 0: 13]@ 142904 EXEC: L2[ 0: 13]@ 142904 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 142916 LOAD: L2[ 0: 13]@ 142916 EXEC: L2[ 0: 13]@ 142916 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 125216 LOAD: L2[ 0: 13]@ 125216 EXEC: L2[ 0: 13]@ 125216 , Size: 2064 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 142760 LOAD: L2[ 0: 13]@ 142760 EXEC: L2[ 0: 13]@ 142760 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 142928 LOAD: L2[ 0: 13]@ 142928 EXEC: L2[ 0: 13]@ 142928 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 142940 LOAD: L2[ 0: 13]@ 142940 EXEC: L2[ 0: 13]@ 142940 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 142952 LOAD: L2[ 0: 13]@ 142952 EXEC: L2[ 0: 13]@ 142952 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 142964 LOAD: L2[ 0: 13]@ 142964 EXEC: L2[ 0: 13]@ 142964 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 154156 , Size: 43000 + S7_Output EXEC: L2[ 2: 3]@ 142976 , Size: 11180 + S10_Output EXEC: L2[ 3: 4]@ 154156 , Size: 11180 + S13_Output EXEC: L2[ 4: 5]@ 142976 , Size: 11180 + S16_Output EXEC: L2[ 5: 6]@ 154156 , Size: 11180 + S19_Output EXEC: L2[ 6: 7]@ 142976 , Size: 11180 + S22_Output EXEC: L2[ 7: 8]@ 154156 , Size: 11180 + S25_Output EXEC: L2[ 8: 9]@ 165336 , Size: 11180 + S28_Output EXEC: L2[ 9: 10]@ 142976 , Size: 11180 + S29_Output EXEC: L2[ 10: 11]@ 154156 , Size: 172 + S32_Output EXEC: L2[ 11: 12]@ 142976 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_172x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_172x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_172x172x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_13x5 +Generating Code For User Kernel: S32_Linear_12x172x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 6880 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 1548 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 29584 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 172 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 172 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 2064 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_m_quant_L3_Flash_Const.dat (size 142976) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 45492 +L2 Memory size (Bytes) : Given: 350000, Used: 197153 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 499112 Bytes +Sum of all Kernels arguments size : 408702 Bytes +Tiling Bandwith overhead : 1.221213 Move/KerArgSize +Sum of baseline bandwidth : 15711780 Bytes +Percentage of baseline BW for L2 : 3.17667 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 9915296 Operations +Total amount of flash coefficients : 142976 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_m_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_m_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_m_quantKernels.h Header file for the generated C code + KWS_ds_cnn_m_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DMEDIUM -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_m_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 12:32:54.765808 139620820993856 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 12:32:54.766235 139620820993856 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 12:32:54.766537 139620820993856 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 12:32:54.768294: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 12:32:54.796681: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 12:32:54.797356: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5598f65a1ce0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 12:32:54.797433: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 12:32:54.804102: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 12:32:54.804303: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 12:32:54.805005: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 12:36:16.325480 139620820993856 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 12:36:16.328293 139620820993856 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 12:36:16.663399 139620820993856 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 12:36:16.667326 139620820993856 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 12:40:12.836830 139620820993856 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 278/ 300 Accuracy: 92.67% +Pred/Tot: 371/ 400 Accuracy: 92.75% +Pred/Tot: 468/ 500 Accuracy: 93.60% +Pred/Tot: 557/ 600 Accuracy: 92.83% +Pred/Tot: 651/ 700 Accuracy: 93.00% +Pred/Tot: 744/ 800 Accuracy: 93.00% +Pred/Tot: 839/ 900 Accuracy: 93.22% +Pred/Tot: 937/1000 Accuracy: 93.70% +Pred/Tot: 1033/1100 Accuracy: 93.91% +Pred/Tot: 1125/1200 Accuracy: 93.75% +Pred/Tot: 1213/1300 Accuracy: 93.31% +Pred/Tot: 1305/1400 Accuracy: 93.21% +Pred/Tot: 1391/1500 Accuracy: 92.73% +Pred/Tot: 1489/1600 Accuracy: 93.06% +Pred/Tot: 1583/1700 Accuracy: 93.12% +Pred/Tot: 1679/1800 Accuracy: 93.28% +Pred/Tot: 1771/1900 Accuracy: 93.21% +Pred/Tot: 1865/2000 Accuracy: 93.25% +Pred/Tot: 1957/2100 Accuracy: 93.19% +Pred/Tot: 2050/2200 Accuracy: 93.18% +Pred/Tot: 2144/2300 Accuracy: 93.22% +Pred/Tot: 2235/2400 Accuracy: 93.12% +Pred/Tot: 2324/2500 Accuracy: 92.96% +Pred/Tot: 2417/2600 Accuracy: 92.96% +Pred/Tot: 2512/2700 Accuracy: 93.04% +Pred/Tot: 2606/2800 Accuracy: 93.07% +Pred/Tot: 2698/2900 Accuracy: 93.03% +Pred/Tot: 2787/3000 Accuracy: 92.90% +Pred/Tot: 2881/3100 Accuracy: 92.94% +Pred/Tot: 2978/3200 Accuracy: 93.06% +Pred/Tot: 3069/3300 Accuracy: 93.00% +Pred/Tot: 3158/3400 Accuracy: 92.88% +Pred/Tot: 3249/3500 Accuracy: 92.83% +Pred/Tot: 3340/3600 Accuracy: 92.78% +Pred/Tot: 3432/3700 Accuracy: 92.76% +Pred/Tot: 3527/3800 Accuracy: 92.82% +Pred/Tot: 3621/3900 Accuracy: 92.85% +Pred/Tot: 3714/4000 Accuracy: 92.85% +Pred/Tot: 3809/4100 Accuracy: 92.90% +Pred/Tot: 3900/4200 Accuracy: 92.86% +Pred/Tot: 3995/4300 Accuracy: 92.91% +Pred/Tot: 4084/4400 Accuracy: 92.82% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4128/4444 Accuracy: 92.89% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 300 6 4 7 8 6 10 10 1 5 14] + [ 0 2 390 1 0 0 0 0 0 0 0 4] + [ 1 5 3 370 0 6 0 0 0 1 0 20] + [ 0 3 1 0 327 1 3 0 1 5 7 2] + [ 0 4 1 5 0 357 2 0 0 0 1 7] + [ 0 1 10 2 1 0 334 1 0 1 0 2] + [ 0 5 3 1 0 0 1 351 0 0 0 2] + [ 1 9 0 2 2 3 0 0 332 9 0 5] + [ 0 0 0 1 34 0 1 1 8 317 5 6] + [ 1 2 1 0 11 4 0 0 0 2 328 1] + [ 0 2 2 6 1 6 0 0 1 0 3 351]] +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 276/ 300 Accuracy: 92.00% +Pred/Tot: 367/ 400 Accuracy: 91.75% +Pred/Tot: 457/ 500 Accuracy: 91.40% +Pred/Tot: 549/ 600 Accuracy: 91.50% +Pred/Tot: 644/ 700 Accuracy: 92.00% +Pred/Tot: 737/ 800 Accuracy: 92.12% +Pred/Tot: 831/ 900 Accuracy: 92.33% +Pred/Tot: 925/1000 Accuracy: 92.50% +Pred/Tot: 1014/1100 Accuracy: 92.18% +Pred/Tot: 1110/1200 Accuracy: 92.50% +Pred/Tot: 1199/1300 Accuracy: 92.23% +Pred/Tot: 1292/1400 Accuracy: 92.29% +Pred/Tot: 1384/1500 Accuracy: 92.27% +Pred/Tot: 1478/1600 Accuracy: 92.38% +Pred/Tot: 1573/1700 Accuracy: 92.53% +Pred/Tot: 1668/1800 Accuracy: 92.67% +Pred/Tot: 1762/1900 Accuracy: 92.74% +Pred/Tot: 1852/2000 Accuracy: 92.60% +Pred/Tot: 1946/2100 Accuracy: 92.67% +Pred/Tot: 2042/2200 Accuracy: 92.82% +Pred/Tot: 2135/2300 Accuracy: 92.83% +Pred/Tot: 2226/2400 Accuracy: 92.75% +Pred/Tot: 2317/2500 Accuracy: 92.68% +Pred/Tot: 2409/2600 Accuracy: 92.65% +Pred/Tot: 2502/2700 Accuracy: 92.67% +Pred/Tot: 2595/2800 Accuracy: 92.68% +Pred/Tot: 2684/2900 Accuracy: 92.55% +Pred/Tot: 2776/3000 Accuracy: 92.53% +Pred/Tot: 2868/3100 Accuracy: 92.52% +Pred/Tot: 2963/3200 Accuracy: 92.59% +Pred/Tot: 3060/3300 Accuracy: 92.73% +Pred/Tot: 3154/3400 Accuracy: 92.76% +Pred/Tot: 3246/3500 Accuracy: 92.74% +Pred/Tot: 3343/3600 Accuracy: 92.86% +Pred/Tot: 3434/3700 Accuracy: 92.81% +Pred/Tot: 3524/3800 Accuracy: 92.74% +Pred/Tot: 3621/3900 Accuracy: 92.85% +Pred/Tot: 3714/4000 Accuracy: 92.85% +Pred/Tot: 3807/4100 Accuracy: 92.85% +Pred/Tot: 3903/4200 Accuracy: 92.93% +Pred/Tot: 3996/4300 Accuracy: 92.93% +Pred/Tot: 4085/4400 Accuracy: 92.84% +Pred/Tot: 4178/4500 Accuracy: 92.84% +Pred/Tot: 4268/4600 Accuracy: 92.78% +Pred/Tot: 4358/4700 Accuracy: 92.72% +Pred/Tot: 4452/4800 Accuracy: 92.75% + +FINAL TESTING ACCURACY: +Pred/Tot: 4534/4889 Accuracy: 92.74% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 346 2 7 3 10 7 10 6 1 7 9] + [ 0 6 406 2 1 0 2 0 0 0 0 2] + [ 0 3 4 373 1 11 0 0 0 0 0 13] + [ 0 4 0 0 399 3 1 1 2 3 5 7] + [ 0 5 3 10 0 378 1 0 0 0 3 6] + [ 0 3 11 2 1 0 389 4 0 0 2 0] + [ 0 11 0 0 0 1 3 378 0 0 1 2] + [ 0 8 0 0 4 6 0 0 362 8 0 8] + [ 0 3 0 2 37 1 1 0 10 331 2 15] + [ 0 3 2 0 3 6 0 1 0 0 390 6] + [ 0 6 1 13 2 4 1 0 0 0 1 374]] diff --git a/accuracy_log/log_test_small_hp_power.txt b/accuracy_log/log_test_small_hp_power.txt new file mode 100644 index 0000000..1feba50 --- /dev/null +++ b/accuracy_log/log_test_small_hp_power.txt @@ -0,0 +1,1457 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_s_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_s_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x64 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_0,DEPTHWISE_CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | DEPTHWISE_CONV_2D_0_0_fusio | conv_fusion_conv_active | 1x49x10 | 64x25x5 | 0 | 8490 | 2624 | 320.00K | F 64x1x10x4 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 1 M 1 P 4x5x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 1 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 2 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 3 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 4 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 5 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 6 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 7 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 8 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 64x25x5 | 64x1x1 | 9 | 8064 | 0 | 8.06K | T average F 25x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 64x1x1 | 12 | 10 | 76 | 780 | 768 | F 12x64x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 16000 | 20336 | 2.66M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 36336 | 2.66M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | D_0_0 | 27124<246.03 | 32769<13.88 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | DEPTHWISE_CONV_2 | -13.99>chan | Q32.0 | Q32.0 | +| | D_0_1 | 2769<13.88 | 43035<15.80 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -15.93>chan | Q32.0 | Q32.0 | +| | | 3035<15.80 | 84013<13.44 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -13.55>chan | Q32.0 | Q32.0 | +| | D_0_3 | 4013<13.44 | 16162<13.86 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -13.97>chan | Q32.0 | Q32.0 | +| | | 6162<13.86 | 70251<10.12 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.20>chan | Q32.0 | Q32.0 | +| | D_0_5 | 0251<10.12 | 10755<12.21 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -12.30>chan | Q32.0 | Q32.0 | +| | | 0755<12.21 | 1055<9.72 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -9.79>chan | Q32.0 | Q32.0 | +| | D_0_7 | 055<9.72 | 75360<14.70 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -14.82>chan | Q32.0 | Q32.0 | +| | | 5360<14.70 | 14717<11.07 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -11.15>chan | Q32.0 | Q32.0 | +| | 0_10 | 4717<11.07 | 00000<29.32 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -29.55 W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S1_Conv2d_64x1x10x4_Relu ======================= +S1_Conv2d_64x1x10x4_Relu Partition[0] Size = 39409 (Min: 200, Max: 55313), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S1_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S1_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S2_Conv2d_64x1x3x3_Relu ======================= +S2_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S2_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S2_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_64x64x1x1_Relu ======================= +S3_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S3_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x3x3_Relu ======================= +S4_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S4_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S4_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_64x64x1x1_Relu ======================= +S5_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S5_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S6_Conv2d_64x1x3x3_Relu ======================= +S6_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S6_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S6_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_64x64x1x1_Relu ======================= +S7_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S7_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S8_Conv2d_64x1x3x3_Relu ======================= +S8_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S8_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S8_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_64x64x1x1_Relu ======================= +S9_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S9_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S10_AveragePool_25x5 ======================= +S10_AveragePool_25x5 Partition[0] Size = 16277 (Min: 250, Max: 16173), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x64x1x1 ======================= +S11_Linear_12x64x1x1 Partition[0] Size = 1805 (Min: 0, Max: 1973), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x64x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_25x5 To S11_Linear_12x64x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_64x64x1x1_Relu To S10_AveragePool_25x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_64x1x3x3_Relu To S9_Conv2d_64x64x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x64x1x1_Relu To S8_Conv2d_64x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_64x1x3x3_Relu To S7_Conv2d_64x64x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_64x64x1x1_Relu To S6_Conv2d_64x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x3x3_Relu To S5_Conv2d_64x64x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_64x64x1x1_Relu To S4_Conv2d_64x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_64x1x3x3_Relu To S3_Conv2d_64x64x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_64x1x10x4_Relu To S2_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_25x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S1_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S1_Weights --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => S1_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S1_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S2_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S2_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S3_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S3_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S3_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S4_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S5_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S5_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S5_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S6_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S6_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S7_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S7_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S8_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S8_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S9_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S9_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S9_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_25x5, Operations: 8000 + I Buff In => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x64x1x1, Operations: 768 + I Buff In => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S2_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S3_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S4_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S5_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S6_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S7_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S8_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S9_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S11_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_25x5 +Generating Code For User Kernel: S11_Linear_12x64x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_SMALL.h --use_power --use_high_prec +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DSMALL +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 33086 (Min: 0, Max: 33814), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 6.400000, Size: 8192, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 6.400000, Size: 8192, Total: 22864, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 23888, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 24048, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 24052, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 26100, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 30196, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 32244, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 32564, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 33552, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 33592, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 36792, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 36792, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 36792, Reusable Memory: 11944, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 36792 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 90/ 100 Accuracy: 90.00% +Pred/Tot: 186/ 200 Accuracy: 93.00% +Pred/Tot: 277/ 300 Accuracy: 92.33% +Pred/Tot: 366/ 400 Accuracy: 91.50% +Pred/Tot: 459/ 500 Accuracy: 91.80% +Pred/Tot: 548/ 600 Accuracy: 91.33% +Pred/Tot: 638/ 700 Accuracy: 91.14% +Pred/Tot: 730/ 800 Accuracy: 91.25% +Pred/Tot: 822/ 900 Accuracy: 91.33% +Pred/Tot: 917/1000 Accuracy: 91.70% +Pred/Tot: 1011/1100 Accuracy: 91.91% +Pred/Tot: 1102/1200 Accuracy: 91.83% +Pred/Tot: 1189/1300 Accuracy: 91.46% +Pred/Tot: 1278/1400 Accuracy: 91.29% +Pred/Tot: 1366/1500 Accuracy: 91.07% +Pred/Tot: 1463/1600 Accuracy: 91.44% +Pred/Tot: 1557/1700 Accuracy: 91.59% +Pred/Tot: 1654/1800 Accuracy: 91.89% +Pred/Tot: 1746/1900 Accuracy: 91.89% +Pred/Tot: 1839/2000 Accuracy: 91.95% +Pred/Tot: 1933/2100 Accuracy: 92.05% +Pred/Tot: 2029/2200 Accuracy: 92.23% +Pred/Tot: 2124/2300 Accuracy: 92.35% +Pred/Tot: 2215/2400 Accuracy: 92.29% +Pred/Tot: 2303/2500 Accuracy: 92.12% +Pred/Tot: 2394/2600 Accuracy: 92.08% +Pred/Tot: 2490/2700 Accuracy: 92.22% +Pred/Tot: 2585/2800 Accuracy: 92.32% +Pred/Tot: 2675/2900 Accuracy: 92.24% +Pred/Tot: 2765/3000 Accuracy: 92.17% +Pred/Tot: 2860/3100 Accuracy: 92.26% +Pred/Tot: 2952/3200 Accuracy: 92.25% +Pred/Tot: 3049/3300 Accuracy: 92.39% +Pred/Tot: 3137/3400 Accuracy: 92.26% +Pred/Tot: 3231/3500 Accuracy: 92.31% +Pred/Tot: 3324/3600 Accuracy: 92.33% +Pred/Tot: 3414/3700 Accuracy: 92.27% +Pred/Tot: 3504/3800 Accuracy: 92.21% +Pred/Tot: 3598/3900 Accuracy: 92.26% +Pred/Tot: 3690/4000 Accuracy: 92.25% +Pred/Tot: 3783/4100 Accuracy: 92.27% +Pred/Tot: 3876/4200 Accuracy: 92.29% +Pred/Tot: 3968/4300 Accuracy: 92.28% +Pred/Tot: 4064/4400 Accuracy: 92.36% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4103/4444 Accuracy: 92.33% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 312 1 6 2 5 8 11 10 1 7 7] + [ 0 5 380 3 0 0 5 1 0 0 0 3] + [ 0 12 4 367 1 5 2 3 0 0 2 10] + [ 0 4 1 0 319 3 1 1 0 8 9 4] + [ 0 6 0 9 0 350 1 0 0 0 3 8] + [ 0 10 11 0 0 0 322 6 0 0 1 2] + [ 0 8 1 0 0 0 2 350 0 0 1 1] + [ 1 7 0 0 7 2 0 1 337 6 0 2] + [ 0 3 0 0 23 0 0 0 13 324 7 3] + [ 2 5 0 0 8 0 1 0 1 1 331 1] + [ 0 10 1 7 1 4 0 0 2 2 5 340]] +Pred/Tot: 90/ 100 Accuracy: 90.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 280/ 300 Accuracy: 93.33% +Pred/Tot: 375/ 400 Accuracy: 93.75% +Pred/Tot: 466/ 500 Accuracy: 93.20% +Pred/Tot: 559/ 600 Accuracy: 93.17% +Pred/Tot: 654/ 700 Accuracy: 93.43% +Pred/Tot: 750/ 800 Accuracy: 93.75% +Pred/Tot: 844/ 900 Accuracy: 93.78% +Pred/Tot: 938/1000 Accuracy: 93.80% +Pred/Tot: 1027/1100 Accuracy: 93.36% +Pred/Tot: 1121/1200 Accuracy: 93.42% +Pred/Tot: 1213/1300 Accuracy: 93.31% +Pred/Tot: 1308/1400 Accuracy: 93.43% +Pred/Tot: 1399/1500 Accuracy: 93.27% +Pred/Tot: 1495/1600 Accuracy: 93.44% +Pred/Tot: 1590/1700 Accuracy: 93.53% +Pred/Tot: 1678/1800 Accuracy: 93.22% +Pred/Tot: 1771/1900 Accuracy: 93.21% +Pred/Tot: 1864/2000 Accuracy: 93.20% +Pred/Tot: 1958/2100 Accuracy: 93.24% +Pred/Tot: 2054/2200 Accuracy: 93.36% +Pred/Tot: 2146/2300 Accuracy: 93.30% +Pred/Tot: 2241/2400 Accuracy: 93.38% +Pred/Tot: 2331/2500 Accuracy: 93.24% +Pred/Tot: 2422/2600 Accuracy: 93.15% +Pred/Tot: 2515/2700 Accuracy: 93.15% +Pred/Tot: 2609/2800 Accuracy: 93.18% +Pred/Tot: 2703/2900 Accuracy: 93.21% +Pred/Tot: 2794/3000 Accuracy: 93.13% +Pred/Tot: 2889/3100 Accuracy: 93.19% +Pred/Tot: 2983/3200 Accuracy: 93.22% +Pred/Tot: 3081/3300 Accuracy: 93.36% +Pred/Tot: 3176/3400 Accuracy: 93.41% +Pred/Tot: 3269/3500 Accuracy: 93.40% +Pred/Tot: 3367/3600 Accuracy: 93.53% +Pred/Tot: 3459/3700 Accuracy: 93.49% +Pred/Tot: 3553/3800 Accuracy: 93.50% +Pred/Tot: 3648/3900 Accuracy: 93.54% +Pred/Tot: 3742/4000 Accuracy: 93.55% +Pred/Tot: 3835/4100 Accuracy: 93.54% +Pred/Tot: 3931/4200 Accuracy: 93.60% +Pred/Tot: 4026/4300 Accuracy: 93.63% +Pred/Tot: 4119/4400 Accuracy: 93.61% +Pred/Tot: 4210/4500 Accuracy: 93.56% +Pred/Tot: 4301/4600 Accuracy: 93.50% +Pred/Tot: 4392/4700 Accuracy: 93.45% +Pred/Tot: 4489/4800 Accuracy: 93.52% + +FINAL TESTING ACCURACY: +Pred/Tot: 4571/4889 Accuracy: 93.50% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 358 0 3 3 6 4 11 8 1 8 6] + [ 0 14 397 2 0 0 5 0 0 0 0 1] + [ 0 5 3 386 0 3 1 0 0 0 0 7] + [ 0 8 0 0 390 0 1 0 3 12 6 5] + [ 0 7 1 11 1 375 1 0 0 0 0 10] + [ 0 3 9 2 2 0 388 7 0 0 1 0] + [ 0 10 0 0 1 1 4 378 0 0 1 1] + [ 1 8 0 0 5 3 0 1 368 4 2 4] + [ 1 11 0 1 10 0 3 0 9 359 0 8] + [ 0 3 0 0 4 2 0 0 0 0 395 7] + [ 0 4 0 16 1 5 3 2 0 1 1 369]] diff --git a/accuracy_log/log_test_small_hp_power_v2.txt b/accuracy_log/log_test_small_hp_power_v2.txt new file mode 100644 index 0000000..2205b86 --- /dev/null +++ b/accuracy_log/log_test_small_hp_power_v2.txt @@ -0,0 +1,1780 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_s_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +unified_quantizer - forwards SOFTMAX_0_11 in: -29.55<(i8-0.00)*0.23083353<29.32 out: None stop [] fusion False +unified_quantizer - handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +unified_quantizer - forwards in edge 0 does not match was -29.55<(i8-0.00)*0.23083353<29.32 need -32.00<(i8-0.00)*0.25000000<31.75 forced +unified_quantizer - backwards FULLY_CONNECTED_0_10 in: -11.15<(i8-0.00)*0.08714711<11.07,chan<(i8-0.00)*chan W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x10x4_Relu ======================= +S4_Conv2d_64x1x10x4_Relu Partition[0] Size = 39393 (Min: 200, Max: 55185), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S4_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: In, Size: 492, Base1: 0, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Bias, Size: 256, Base1: 492, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Scale, Size: 64, Base1: 748, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ScaleN, Size: 64, Base1: 812, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Filter, Size: 2560, Base1: 876, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Out, Size: 8000, Base1: 3436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ConvOut, Size: 32000, Base1: 11436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 43436, Base2: 0 +S4_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S7_Conv2d_64x1x3x3_Relu ======================= +S7_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S7_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S7_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_64x64x1x1_Relu ======================= +S10_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S10_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S13_Conv2d_64x1x3x3_Relu ======================= +S13_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S13_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S13_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_64x64x1x1_Relu ======================= +S16_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S16_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S19_Conv2d_64x1x3x3_Relu ======================= +S19_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S19_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S19_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_64x64x1x1_Relu ======================= +S22_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S22_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S25_Conv2d_64x1x3x3_Relu ======================= +S25_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S25_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S25_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_64x64x1x1_Relu ======================= +S28_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S28_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S29_AveragePool_25x5 ======================= +S29_AveragePool_25x5 Partition[0] Size = 16271 (Min: 250, Max: 16155), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 25, TileOverlap: 23, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_25x5, Arg: In, Size: 8000, Base1: 0, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Out, Size: 64, Base1: 8000, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Infos, Size: 12, Base1: 8064, Base2: 0 +S29_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x64x1x1 ======================= +S32_Linear_12x64x1x1 Partition[0] Size = 1791 (Min: 0, Max: 1875), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x64x1x1, Arg: In, Size: 64, Base1: 0, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Filter, Size: 768, Base1: 64, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Bias, Size: 48, Base1: 832, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Out, Size: 12, Base1: 880, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Scale, Size: 12, Base1: 892, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: ScaleN, Size: 12, Base1: 904, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Infos, Size: 12, Base1: 916, Base2: 0 +S32_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x64x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_25x5 To S32_Linear_12x64x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_64x64x1x1_Relu To S29_AveragePool_25x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_64x1x3x3_Relu To S28_Conv2d_64x64x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_64x64x1x1_Relu To S25_Conv2d_64x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_64x1x3x3_Relu To S22_Conv2d_64x64x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_64x64x1x1_Relu To S19_Conv2d_64x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_64x1x3x3_Relu To S16_Conv2d_64x64x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_64x64x1x1_Relu To S13_Conv2d_64x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x1x3x3_Relu To S10_Conv2d_64x64x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x10x4_Relu To S7_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_25x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S4_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S10_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S13_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S16_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S19_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S22_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S25_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S28_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_25x5, Operations: 8000 + I Buff In => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x64x1x1, Operations: 768 + I Buff In => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S7_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S13_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S16_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S19_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S22_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S25_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S28_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S29_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S32_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_25x5 +Generating Code For User Kernel: S32_Linear_12x64x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 188/ 200 Accuracy: 94.00% +Pred/Tot: 279/ 300 Accuracy: 93.00% +Pred/Tot: 369/ 400 Accuracy: 92.25% +Pred/Tot: 462/ 500 Accuracy: 92.40% +Pred/Tot: 552/ 600 Accuracy: 92.00% +Pred/Tot: 643/ 700 Accuracy: 91.86% +Pred/Tot: 735/ 800 Accuracy: 91.88% +Pred/Tot: 824/ 900 Accuracy: 91.56% +Pred/Tot: 917/1000 Accuracy: 91.70% +Pred/Tot: 1008/1100 Accuracy: 91.64% +Pred/Tot: 1098/1200 Accuracy: 91.50% +Pred/Tot: 1189/1300 Accuracy: 91.46% +Pred/Tot: 1280/1400 Accuracy: 91.43% +Pred/Tot: 1369/1500 Accuracy: 91.27% +Pred/Tot: 1466/1600 Accuracy: 91.62% +Pred/Tot: 1559/1700 Accuracy: 91.71% +Pred/Tot: 1656/1800 Accuracy: 92.00% +Pred/Tot: 1748/1900 Accuracy: 92.00% +Pred/Tot: 1840/2000 Accuracy: 92.00% +Pred/Tot: 1933/2100 Accuracy: 92.05% +Pred/Tot: 2028/2200 Accuracy: 92.18% +Pred/Tot: 2124/2300 Accuracy: 92.35% +Pred/Tot: 2216/2400 Accuracy: 92.33% +Pred/Tot: 2302/2500 Accuracy: 92.08% +Pred/Tot: 2392/2600 Accuracy: 92.00% +Pred/Tot: 2487/2700 Accuracy: 92.11% +Pred/Tot: 2582/2800 Accuracy: 92.21% +Pred/Tot: 2672/2900 Accuracy: 92.14% +Pred/Tot: 2764/3000 Accuracy: 92.13% +Pred/Tot: 2859/3100 Accuracy: 92.23% +Pred/Tot: 2951/3200 Accuracy: 92.22% +Pred/Tot: 3049/3300 Accuracy: 92.39% +Pred/Tot: 3139/3400 Accuracy: 92.32% +Pred/Tot: 3232/3500 Accuracy: 92.34% +Pred/Tot: 3327/3600 Accuracy: 92.42% +Pred/Tot: 3417/3700 Accuracy: 92.35% +Pred/Tot: 3510/3800 Accuracy: 92.37% +Pred/Tot: 3603/3900 Accuracy: 92.38% +Pred/Tot: 3694/4000 Accuracy: 92.35% +Pred/Tot: 3787/4100 Accuracy: 92.37% +Pred/Tot: 3879/4200 Accuracy: 92.36% +Pred/Tot: 3973/4300 Accuracy: 92.40% +Pred/Tot: 4070/4400 Accuracy: 92.50% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4109/4444 Accuracy: 92.46% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 314 1 6 0 5 6 12 9 0 9 8] + [ 0 8 378 3 0 0 4 2 0 0 0 2] + [ 0 10 5 364 0 3 5 3 0 0 2 14] + [ 0 4 1 0 322 3 1 1 0 9 6 3] + [ 0 7 2 8 0 352 0 0 0 0 2 6] + [ 0 10 10 0 0 1 323 8 0 0 0 0] + [ 0 10 0 0 0 0 2 350 0 0 1 0] + [ 1 5 0 0 5 1 1 2 343 4 0 1] + [ 0 2 1 0 20 0 0 0 13 326 6 5] + [ 2 1 1 0 8 1 1 0 1 1 331 3] + [ 0 14 1 9 2 3 1 0 2 1 4 335]] +Pred/Tot: 90/ 100 Accuracy: 90.00% +Pred/Tot: 183/ 200 Accuracy: 91.50% +Pred/Tot: 278/ 300 Accuracy: 92.67% +Pred/Tot: 371/ 400 Accuracy: 92.75% +Pred/Tot: 460/ 500 Accuracy: 92.00% +Pred/Tot: 550/ 600 Accuracy: 91.67% +Pred/Tot: 646/ 700 Accuracy: 92.29% +Pred/Tot: 741/ 800 Accuracy: 92.62% +Pred/Tot: 834/ 900 Accuracy: 92.67% +Pred/Tot: 929/1000 Accuracy: 92.90% +Pred/Tot: 1018/1100 Accuracy: 92.55% +Pred/Tot: 1116/1200 Accuracy: 93.00% +Pred/Tot: 1206/1300 Accuracy: 92.77% +Pred/Tot: 1301/1400 Accuracy: 92.93% +Pred/Tot: 1393/1500 Accuracy: 92.87% +Pred/Tot: 1490/1600 Accuracy: 93.12% +Pred/Tot: 1582/1700 Accuracy: 93.06% +Pred/Tot: 1669/1800 Accuracy: 92.72% +Pred/Tot: 1763/1900 Accuracy: 92.79% +Pred/Tot: 1856/2000 Accuracy: 92.80% +Pred/Tot: 1947/2100 Accuracy: 92.71% +Pred/Tot: 2044/2200 Accuracy: 92.91% +Pred/Tot: 2135/2300 Accuracy: 92.83% +Pred/Tot: 2231/2400 Accuracy: 92.96% +Pred/Tot: 2321/2500 Accuracy: 92.84% +Pred/Tot: 2411/2600 Accuracy: 92.73% +Pred/Tot: 2503/2700 Accuracy: 92.70% +Pred/Tot: 2594/2800 Accuracy: 92.64% +Pred/Tot: 2688/2900 Accuracy: 92.69% +Pred/Tot: 2780/3000 Accuracy: 92.67% +Pred/Tot: 2876/3100 Accuracy: 92.77% +Pred/Tot: 2970/3200 Accuracy: 92.81% +Pred/Tot: 3069/3300 Accuracy: 93.00% +Pred/Tot: 3163/3400 Accuracy: 93.03% +Pred/Tot: 3255/3500 Accuracy: 93.00% +Pred/Tot: 3353/3600 Accuracy: 93.14% +Pred/Tot: 3444/3700 Accuracy: 93.08% +Pred/Tot: 3538/3800 Accuracy: 93.11% +Pred/Tot: 3633/3900 Accuracy: 93.15% +Pred/Tot: 3725/4000 Accuracy: 93.12% +Pred/Tot: 3818/4100 Accuracy: 93.12% +Pred/Tot: 3914/4200 Accuracy: 93.19% +Pred/Tot: 4010/4300 Accuracy: 93.26% +Pred/Tot: 4103/4400 Accuracy: 93.25% +Pred/Tot: 4195/4500 Accuracy: 93.22% +Pred/Tot: 4286/4600 Accuracy: 93.17% +Pred/Tot: 4378/4700 Accuracy: 93.15% +Pred/Tot: 4475/4800 Accuracy: 93.23% + +FINAL TESTING ACCURACY: +Pred/Tot: 4557/4889 Accuracy: 93.21% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 358 1 4 3 5 5 13 4 1 7 7] + [ 1 11 395 2 0 0 8 0 0 0 0 2] + [ 0 6 2 384 0 2 2 0 0 0 0 9] + [ 0 8 0 0 383 3 1 1 5 13 8 3] + [ 0 10 3 9 1 372 0 1 0 0 2 8] + [ 0 5 7 0 2 0 392 5 0 0 1 0] + [ 0 12 0 0 1 1 3 377 0 0 1 1] + [ 0 10 1 0 2 3 0 0 369 5 3 3] + [ 1 10 0 1 11 0 3 0 11 355 0 10] + [ 0 3 0 0 5 2 0 0 0 0 396 5] + [ 0 9 2 12 1 6 1 0 0 0 3 368]] diff --git a/accuracy_log/log_test_small_hp_spectr.txt b/accuracy_log/log_test_small_hp_spectr.txt new file mode 100644 index 0000000..6c5bfba --- /dev/null +++ b/accuracy_log/log_test_small_hp_spectr.txt @@ -0,0 +1,1457 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 1 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=0 USE_HIGH_PREC=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_s_quant.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_s_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x64 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_0,DEPTHWISE_CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | DEPTHWISE_CONV_2D_0_0_fusio | conv_fusion_conv_active | 1x49x10 | 64x25x5 | 0 | 8490 | 2624 | 320.00K | F 64x1x10x4 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 1 M 1 P 4x5x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 1 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 2 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 3 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 4 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 5 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 6 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 7 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 8 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 64x25x5 | 64x1x1 | 9 | 8064 | 0 | 8.06K | T average F 25x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 64x1x1 | 12 | 10 | 76 | 780 | 768 | F 12x64x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 16000 | 20336 | 2.66M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 36336 | 2.66M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | D_0_0 | 27124<246.03 | 37761<30.02 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | DEPTHWISE_CONV_2 | -30.26>chan | Q32.0 | Q32.0 | +| | D_0_1 | 7761<30.02 | 31869<31.54 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -31.78>chan | Q32.0 | Q32.0 | +| | | 1869<31.54 | 76032<27.15 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -27.36>chan | Q32.0 | Q32.0 | +| | D_0_3 | 6032<27.15 | 53918<26.99 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -27.21>chan | Q32.0 | Q32.0 | +| | | 3918<26.99 | 48160<21.91 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -22.08>chan | Q32.0 | Q32.0 | +| | D_0_5 | 8160<21.91 | 48363<22.03 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -22.21>chan | Q32.0 | Q32.0 | +| | | 8363<22.03 | 22380<16.54 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -16.67>chan | Q32.0 | Q32.0 | +| | D_0_7 | 2380<16.54 | 42478<15.17 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -15.29>chan | Q32.0 | Q32.0 | +| | | 2478<15.17 | 86798<11.29 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -11.38>chan | Q32.0 | Q32.0 | +| | 0_10 | 6798<11.29 | 00000<12.09 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -12.19 W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S1_Conv2d_64x1x10x4_Relu ======================= +S1_Conv2d_64x1x10x4_Relu Partition[0] Size = 39409 (Min: 200, Max: 55313), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S1_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S1_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S2_Conv2d_64x1x3x3_Relu ======================= +S2_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S2_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S2_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_64x64x1x1_Relu ======================= +S3_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S3_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x3x3_Relu ======================= +S4_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S4_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S4_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_64x64x1x1_Relu ======================= +S5_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S5_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S6_Conv2d_64x1x3x3_Relu ======================= +S6_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S6_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S6_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_64x64x1x1_Relu ======================= +S7_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S7_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S8_Conv2d_64x1x3x3_Relu ======================= +S8_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S8_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S8_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_64x64x1x1_Relu ======================= +S9_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S9_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S10_AveragePool_25x5 ======================= +S10_AveragePool_25x5 Partition[0] Size = 16277 (Min: 250, Max: 16173), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x64x1x1 ======================= +S11_Linear_12x64x1x1 Partition[0] Size = 1805 (Min: 0, Max: 1973), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x64x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_25x5 To S11_Linear_12x64x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_64x64x1x1_Relu To S10_AveragePool_25x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_64x1x3x3_Relu To S9_Conv2d_64x64x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x64x1x1_Relu To S8_Conv2d_64x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_64x1x3x3_Relu To S7_Conv2d_64x64x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_64x64x1x1_Relu To S6_Conv2d_64x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x3x3_Relu To S5_Conv2d_64x64x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_64x64x1x1_Relu To S4_Conv2d_64x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_64x1x3x3_Relu To S3_Conv2d_64x64x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_64x1x10x4_Relu To S2_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_25x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S1_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S1_Weights --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => S1_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S1_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S2_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S2_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S3_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S3_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S3_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S4_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S5_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S5_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S5_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S6_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S6_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S7_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S7_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S8_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S8_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S9_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S9_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S9_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_25x5, Operations: 8000 + I Buff In => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x64x1x1, Operations: 768 + I Buff In => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S2_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S3_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S4_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S5_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S6_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S7_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S8_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S9_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S11_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_25x5 +Generating Code For User Kernel: S11_Linear_12x64x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_SMALL.h --use_high_prec +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DSMALL +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 33086 (Min: 0, Max: 33814), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 6.400000, Size: 8192, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 6.400000, Size: 8192, Total: 22864, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 23888, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 24048, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 24052, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 26100, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 30196, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 32244, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 32564, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 33552, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 33592, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 36792, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 36792, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 36792, Reusable Memory: 11944, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 36792 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -DUSE_ABS -DHIGH_PREC_FFT -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 186/ 200 Accuracy: 93.00% +Pred/Tot: 275/ 300 Accuracy: 91.67% +Pred/Tot: 356/ 400 Accuracy: 89.00% +Pred/Tot: 449/ 500 Accuracy: 89.80% +Pred/Tot: 535/ 600 Accuracy: 89.17% +Pred/Tot: 629/ 700 Accuracy: 89.86% +Pred/Tot: 714/ 800 Accuracy: 89.25% +Pred/Tot: 800/ 900 Accuracy: 88.89% +Pred/Tot: 894/1000 Accuracy: 89.40% +Pred/Tot: 983/1100 Accuracy: 89.36% +Pred/Tot: 1072/1200 Accuracy: 89.33% +Pred/Tot: 1159/1300 Accuracy: 89.15% +Pred/Tot: 1252/1400 Accuracy: 89.43% +Pred/Tot: 1334/1500 Accuracy: 88.93% +Pred/Tot: 1427/1600 Accuracy: 89.19% +Pred/Tot: 1517/1700 Accuracy: 89.24% +Pred/Tot: 1607/1800 Accuracy: 89.28% +Pred/Tot: 1699/1900 Accuracy: 89.42% +Pred/Tot: 1787/2000 Accuracy: 89.35% +Pred/Tot: 1876/2100 Accuracy: 89.33% +Pred/Tot: 1973/2200 Accuracy: 89.68% +Pred/Tot: 2061/2300 Accuracy: 89.61% +Pred/Tot: 2146/2400 Accuracy: 89.42% +Pred/Tot: 2231/2500 Accuracy: 89.24% +Pred/Tot: 2319/2600 Accuracy: 89.19% +Pred/Tot: 2407/2700 Accuracy: 89.15% +Pred/Tot: 2495/2800 Accuracy: 89.11% +Pred/Tot: 2578/2900 Accuracy: 88.90% +Pred/Tot: 2664/3000 Accuracy: 88.80% +Pred/Tot: 2757/3100 Accuracy: 88.94% +Pred/Tot: 2846/3200 Accuracy: 88.94% +Pred/Tot: 2940/3300 Accuracy: 89.09% +Pred/Tot: 3023/3400 Accuracy: 88.91% +Pred/Tot: 3109/3500 Accuracy: 88.83% +Pred/Tot: 3202/3600 Accuracy: 88.94% +Pred/Tot: 3283/3700 Accuracy: 88.73% +Pred/Tot: 3375/3800 Accuracy: 88.82% +Pred/Tot: 3464/3900 Accuracy: 88.82% +Pred/Tot: 3553/4000 Accuracy: 88.83% +Pred/Tot: 3645/4100 Accuracy: 88.90% +Pred/Tot: 3735/4200 Accuracy: 88.93% +Pred/Tot: 3824/4300 Accuracy: 88.93% +Pred/Tot: 3918/4400 Accuracy: 89.05% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 3957/4444 Accuracy: 89.04% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 0 271 5 6 6 16 9 16 21 0 9 12] + [ 2 7 352 8 0 4 22 0 0 0 0 2] + [ 2 17 0 356 2 9 1 1 2 0 1 15] + [ 0 5 0 0 316 2 2 3 0 13 7 2] + [ 1 6 1 14 0 338 4 0 1 0 3 9] + [ 0 11 5 2 1 1 326 3 0 1 2 0] + [ 0 11 1 0 0 0 3 341 0 1 0 6] + [ 2 12 1 0 7 1 0 0 328 10 1 1] + [ 0 5 0 0 27 1 0 1 7 323 4 5] + [ 3 4 1 1 12 4 1 0 1 1 318 4] + [ 2 12 1 16 3 12 1 1 3 1 3 317]] +Pred/Tot: 86/ 100 Accuracy: 86.00% +Pred/Tot: 175/ 200 Accuracy: 87.50% +Pred/Tot: 266/ 300 Accuracy: 88.67% +Pred/Tot: 355/ 400 Accuracy: 88.75% +Pred/Tot: 446/ 500 Accuracy: 89.20% +Pred/Tot: 537/ 600 Accuracy: 89.50% +Pred/Tot: 630/ 700 Accuracy: 90.00% +Pred/Tot: 722/ 800 Accuracy: 90.25% +Pred/Tot: 813/ 900 Accuracy: 90.33% +Pred/Tot: 905/1000 Accuracy: 90.50% +Pred/Tot: 993/1100 Accuracy: 90.27% +Pred/Tot: 1085/1200 Accuracy: 90.42% +Pred/Tot: 1173/1300 Accuracy: 90.23% +Pred/Tot: 1262/1400 Accuracy: 90.14% +Pred/Tot: 1348/1500 Accuracy: 89.87% +Pred/Tot: 1442/1600 Accuracy: 90.12% +Pred/Tot: 1531/1700 Accuracy: 90.06% +Pred/Tot: 1620/1800 Accuracy: 90.00% +Pred/Tot: 1704/1900 Accuracy: 89.68% +Pred/Tot: 1790/2000 Accuracy: 89.50% +Pred/Tot: 1883/2100 Accuracy: 89.67% +Pred/Tot: 1980/2200 Accuracy: 90.00% +Pred/Tot: 2069/2300 Accuracy: 89.96% +Pred/Tot: 2155/2400 Accuracy: 89.79% +Pred/Tot: 2247/2500 Accuracy: 89.88% +Pred/Tot: 2336/2600 Accuracy: 89.85% +Pred/Tot: 2424/2700 Accuracy: 89.78% +Pred/Tot: 2510/2800 Accuracy: 89.64% +Pred/Tot: 2601/2900 Accuracy: 89.69% +Pred/Tot: 2684/3000 Accuracy: 89.47% +Pred/Tot: 2774/3100 Accuracy: 89.48% +Pred/Tot: 2864/3200 Accuracy: 89.50% +Pred/Tot: 2959/3300 Accuracy: 89.67% +Pred/Tot: 3046/3400 Accuracy: 89.59% +Pred/Tot: 3134/3500 Accuracy: 89.54% +Pred/Tot: 3228/3600 Accuracy: 89.67% +Pred/Tot: 3317/3700 Accuracy: 89.65% +Pred/Tot: 3407/3800 Accuracy: 89.66% +Pred/Tot: 3495/3900 Accuracy: 89.62% +Pred/Tot: 3580/4000 Accuracy: 89.50% +Pred/Tot: 3668/4100 Accuracy: 89.46% +Pred/Tot: 3762/4200 Accuracy: 89.57% +Pred/Tot: 3854/4300 Accuracy: 89.63% +Pred/Tot: 3940/4400 Accuracy: 89.55% +Pred/Tot: 4030/4500 Accuracy: 89.56% +Pred/Tot: 4114/4600 Accuracy: 89.43% +Pred/Tot: 4205/4700 Accuracy: 89.47% +Pred/Tot: 4297/4800 Accuracy: 89.52% + +FINAL TESTING ACCURACY: +Pred/Tot: 4376/4889 Accuracy: 89.51% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 2 323 2 6 7 10 10 13 12 2 6 15] + [ 1 11 380 7 0 0 15 1 0 1 0 3] + [ 0 6 2 361 0 18 6 1 0 0 1 10] + [ 2 11 0 0 385 0 4 0 3 7 8 5] + [ 2 17 2 15 0 347 4 0 4 1 2 12] + [ 2 5 9 3 1 0 383 8 0 1 0 0] + [ 1 15 0 0 1 1 4 370 1 0 3 0] + [ 2 13 2 0 5 5 0 0 354 13 0 2] + [ 2 9 0 0 32 1 4 2 6 332 3 11] + [ 2 5 0 0 8 3 0 0 1 2 385 5] + [ 4 12 0 20 2 11 1 1 0 0 3 348]] diff --git a/accuracy_log/log_test_small_hp_spectr_v2.txt b/accuracy_log/log_test_small_hp_spectr_v2.txt new file mode 100644 index 0000000..001d963 --- /dev/null +++ b/accuracy_log/log_test_small_hp_spectr_v2.txt @@ -0,0 +1,1780 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_s_quant.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +unified_quantizer - forwards SOFTMAX_0_11 in: -12.19<(i8-0.00)*0.09519558<12.09 out: None stop [] fusion False +unified_quantizer - handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +unified_quantizer - forwards in edge 0 does not match was -12.19<(i8-0.00)*0.09519558<12.09 need -16.00<(i8-0.00)*0.12500000<15.88 forced +unified_quantizer - backwards FULLY_CONNECTED_0_10 in: -11.38<(i8-0.00)*0.08886796<11.29,chan<(i8-0.00)*chan W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x10x4_Relu ======================= +S4_Conv2d_64x1x10x4_Relu Partition[0] Size = 39393 (Min: 200, Max: 55185), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S4_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: In, Size: 492, Base1: 0, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Bias, Size: 256, Base1: 492, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Scale, Size: 64, Base1: 748, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ScaleN, Size: 64, Base1: 812, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Filter, Size: 2560, Base1: 876, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Out, Size: 8000, Base1: 3436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ConvOut, Size: 32000, Base1: 11436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 43436, Base2: 0 +S4_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S7_Conv2d_64x1x3x3_Relu ======================= +S7_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S7_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S7_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_64x64x1x1_Relu ======================= +S10_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S10_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S13_Conv2d_64x1x3x3_Relu ======================= +S13_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S13_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S13_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_64x64x1x1_Relu ======================= +S16_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S16_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S19_Conv2d_64x1x3x3_Relu ======================= +S19_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S19_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S19_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_64x64x1x1_Relu ======================= +S22_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S22_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S25_Conv2d_64x1x3x3_Relu ======================= +S25_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S25_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S25_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_64x64x1x1_Relu ======================= +S28_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S28_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S29_AveragePool_25x5 ======================= +S29_AveragePool_25x5 Partition[0] Size = 16271 (Min: 250, Max: 16155), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 25, TileOverlap: 23, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_25x5, Arg: In, Size: 8000, Base1: 0, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Out, Size: 64, Base1: 8000, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Infos, Size: 12, Base1: 8064, Base2: 0 +S29_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x64x1x1 ======================= +S32_Linear_12x64x1x1 Partition[0] Size = 1791 (Min: 0, Max: 1875), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x64x1x1, Arg: In, Size: 64, Base1: 0, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Filter, Size: 768, Base1: 64, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Bias, Size: 48, Base1: 832, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Out, Size: 12, Base1: 880, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Scale, Size: 12, Base1: 892, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: ScaleN, Size: 12, Base1: 904, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Infos, Size: 12, Base1: 916, Base2: 0 +S32_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x64x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_25x5 To S32_Linear_12x64x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_64x64x1x1_Relu To S29_AveragePool_25x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_64x1x3x3_Relu To S28_Conv2d_64x64x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_64x64x1x1_Relu To S25_Conv2d_64x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_64x1x3x3_Relu To S22_Conv2d_64x64x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_64x64x1x1_Relu To S19_Conv2d_64x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_64x1x3x3_Relu To S16_Conv2d_64x64x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_64x64x1x1_Relu To S13_Conv2d_64x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x1x3x3_Relu To S10_Conv2d_64x64x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x10x4_Relu To S7_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_25x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S4_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S10_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S13_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S16_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S19_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S22_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S25_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S28_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_25x5, Operations: 8000 + I Buff In => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x64x1x1, Operations: 768 + I Buff In => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S7_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S13_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S16_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S19_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S22_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S25_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S28_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S29_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S32_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_25x5 +Generating Code For User Kernel: S32_Linear_12x64x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -g -O0 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 273/ 300 Accuracy: 91.00% +Pred/Tot: 353/ 400 Accuracy: 88.25% +Pred/Tot: 442/ 500 Accuracy: 88.40% +Pred/Tot: 526/ 600 Accuracy: 87.67% +Pred/Tot: 618/ 700 Accuracy: 88.29% +Pred/Tot: 704/ 800 Accuracy: 88.00% +Pred/Tot: 794/ 900 Accuracy: 88.22% +Pred/Tot: 888/1000 Accuracy: 88.80% +Pred/Tot: 979/1100 Accuracy: 89.00% +Pred/Tot: 1071/1200 Accuracy: 89.25% +Pred/Tot: 1158/1300 Accuracy: 89.08% +Pred/Tot: 1246/1400 Accuracy: 89.00% +Pred/Tot: 1329/1500 Accuracy: 88.60% +Pred/Tot: 1422/1600 Accuracy: 88.88% +Pred/Tot: 1510/1700 Accuracy: 88.82% +Pred/Tot: 1599/1800 Accuracy: 88.83% +Pred/Tot: 1689/1900 Accuracy: 88.89% +Pred/Tot: 1777/2000 Accuracy: 88.85% +Pred/Tot: 1866/2100 Accuracy: 88.86% +Pred/Tot: 1957/2200 Accuracy: 88.95% +Pred/Tot: 2046/2300 Accuracy: 88.96% +Pred/Tot: 2132/2400 Accuracy: 88.83% +Pred/Tot: 2217/2500 Accuracy: 88.68% +Pred/Tot: 2306/2600 Accuracy: 88.69% +Pred/Tot: 2397/2700 Accuracy: 88.78% +Pred/Tot: 2485/2800 Accuracy: 88.75% +Pred/Tot: 2573/2900 Accuracy: 88.72% +Pred/Tot: 2656/3000 Accuracy: 88.53% +Pred/Tot: 2748/3100 Accuracy: 88.65% +Pred/Tot: 2840/3200 Accuracy: 88.75% +Pred/Tot: 2933/3300 Accuracy: 88.88% +Pred/Tot: 3016/3400 Accuracy: 88.71% +Pred/Tot: 3107/3500 Accuracy: 88.77% +Pred/Tot: 3197/3600 Accuracy: 88.81% +Pred/Tot: 3282/3700 Accuracy: 88.70% +Pred/Tot: 3375/3800 Accuracy: 88.82% +Pred/Tot: 3465/3900 Accuracy: 88.85% +Pred/Tot: 3553/4000 Accuracy: 88.83% +Pred/Tot: 3642/4100 Accuracy: 88.83% +Pred/Tot: 3732/4200 Accuracy: 88.86% +Pred/Tot: 3818/4300 Accuracy: 88.79% +Pred/Tot: 3907/4400 Accuracy: 88.80% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 3947/4444 Accuracy: 88.82% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 2 277 7 4 3 12 10 24 11 2 5 14] + [ 0 6 371 7 0 3 7 1 0 0 1 1] + [ 1 15 4 325 2 17 4 1 0 0 1 36] + [ 0 7 1 0 317 3 3 1 2 5 7 4] + [ 1 8 0 5 0 347 2 0 1 0 4 9] + [ 0 10 8 1 2 2 317 11 0 0 0 1] + [ 0 7 0 0 0 0 5 347 0 0 0 4] + [ 2 16 0 0 5 0 0 0 326 11 1 2] + [ 0 3 0 1 35 1 0 1 16 306 4 6] + [ 2 6 0 0 14 5 4 0 0 2 315 2] + [ 2 18 1 6 2 10 0 0 2 0 3 328]] +Pred/Tot: 88/ 100 Accuracy: 88.00% +Pred/Tot: 180/ 200 Accuracy: 90.00% +Pred/Tot: 271/ 300 Accuracy: 90.33% +Pred/Tot: 359/ 400 Accuracy: 89.75% +Pred/Tot: 446/ 500 Accuracy: 89.20% +Pred/Tot: 538/ 600 Accuracy: 89.67% +Pred/Tot: 629/ 700 Accuracy: 89.86% +Pred/Tot: 722/ 800 Accuracy: 90.25% +Pred/Tot: 813/ 900 Accuracy: 90.33% +Pred/Tot: 904/1000 Accuracy: 90.40% +Pred/Tot: 991/1100 Accuracy: 90.09% +Pred/Tot: 1083/1200 Accuracy: 90.25% +Pred/Tot: 1166/1300 Accuracy: 89.69% +Pred/Tot: 1254/1400 Accuracy: 89.57% +Pred/Tot: 1341/1500 Accuracy: 89.40% +Pred/Tot: 1433/1600 Accuracy: 89.56% +Pred/Tot: 1520/1700 Accuracy: 89.41% +Pred/Tot: 1613/1800 Accuracy: 89.61% +Pred/Tot: 1700/1900 Accuracy: 89.47% +Pred/Tot: 1789/2000 Accuracy: 89.45% +Pred/Tot: 1880/2100 Accuracy: 89.52% +Pred/Tot: 1974/2200 Accuracy: 89.73% +Pred/Tot: 2063/2300 Accuracy: 89.70% +Pred/Tot: 2153/2400 Accuracy: 89.71% +Pred/Tot: 2243/2500 Accuracy: 89.72% +Pred/Tot: 2330/2600 Accuracy: 89.62% +Pred/Tot: 2420/2700 Accuracy: 89.63% +Pred/Tot: 2509/2800 Accuracy: 89.61% +Pred/Tot: 2596/2900 Accuracy: 89.52% +Pred/Tot: 2682/3000 Accuracy: 89.40% +Pred/Tot: 2772/3100 Accuracy: 89.42% +Pred/Tot: 2863/3200 Accuracy: 89.47% +Pred/Tot: 2956/3300 Accuracy: 89.58% +Pred/Tot: 3045/3400 Accuracy: 89.56% +Pred/Tot: 3131/3500 Accuracy: 89.46% +Pred/Tot: 3224/3600 Accuracy: 89.56% +Pred/Tot: 3307/3700 Accuracy: 89.38% +Pred/Tot: 3398/3800 Accuracy: 89.42% +Pred/Tot: 3491/3900 Accuracy: 89.51% +Pred/Tot: 3575/4000 Accuracy: 89.38% +Pred/Tot: 3665/4100 Accuracy: 89.39% +Pred/Tot: 3757/4200 Accuracy: 89.45% +Pred/Tot: 3844/4300 Accuracy: 89.40% +Pred/Tot: 3934/4400 Accuracy: 89.41% +Pred/Tot: 4021/4500 Accuracy: 89.36% +Pred/Tot: 4105/4600 Accuracy: 89.24% +Pred/Tot: 4192/4700 Accuracy: 89.19% +Pred/Tot: 4285/4800 Accuracy: 89.27% + +FINAL TESTING ACCURACY: +Pred/Tot: 4364/4889 Accuracy: 89.26% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 319 4 4 3 7 13 22 9 2 7 18] + [ 0 9 389 3 0 1 13 1 0 1 1 1] + [ 0 21 1 315 1 22 4 2 0 0 0 39] + [ 0 6 0 1 389 0 3 2 8 6 4 6] + [ 1 11 1 3 0 364 3 0 6 2 1 14] + [ 0 4 11 2 0 0 378 13 0 0 2 2] + [ 0 1 0 0 0 2 4 385 0 0 1 3] + [ 0 16 0 0 6 8 0 1 351 8 0 6] + [ 1 4 1 0 37 2 3 1 8 326 3 16] + [ 0 5 0 0 17 13 2 0 0 1 372 1] + [ 1 10 0 10 0 9 1 1 0 2 0 368]] diff --git a/accuracy_log/log_test_small_power.txt b/accuracy_log/log_test_small_power.txt new file mode 100644 index 0000000..f610abd --- /dev/null +++ b/accuracy_log/log_test_small_power.txt @@ -0,0 +1,1457 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=1 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_s_quant_power.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_s_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x64 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_0,DEPTHWISE_CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | DEPTHWISE_CONV_2D_0_0_fusio | conv_fusion_conv_active | 1x49x10 | 64x25x5 | 0 | 8490 | 2624 | 320.00K | F 64x1x10x4 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 1 M 1 P 4x5x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 1 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 2 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 3 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 4 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 5 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 6 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 7 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 8 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 64x25x5 | 64x1x1 | 9 | 8064 | 0 | 8.06K | T average F 25x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 64x1x1 | 12 | 10 | 76 | 780 | 768 | F 12x64x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 16000 | 20336 | 2.66M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 36336 | 2.66M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | D_0_0 | 27124<246.03 | 32769<13.88 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | DEPTHWISE_CONV_2 | -13.99>chan | Q32.0 | Q32.0 | +| | D_0_1 | 2769<13.88 | 43035<15.80 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -15.93>chan | Q32.0 | Q32.0 | +| | | 3035<15.80 | 84013<13.44 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -13.55>chan | Q32.0 | Q32.0 | +| | D_0_3 | 4013<13.44 | 16162<13.86 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -13.97>chan | Q32.0 | Q32.0 | +| | | 6162<13.86 | 70251<10.12 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -10.20>chan | Q32.0 | Q32.0 | +| | D_0_5 | 0251<10.12 | 10755<12.21 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -12.30>chan | Q32.0 | Q32.0 | +| | | 0755<12.21 | 1055<9.72 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -9.79>chan | Q32.0 | Q32.0 | +| | D_0_7 | 055<9.72 | 75360<14.70 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -14.82>chan | Q32.0 | Q32.0 | +| | | 5360<14.70 | 14717<11.07 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -11.15>chan | Q32.0 | Q32.0 | +| | 0_10 | 4717<11.07 | 00000<29.32 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -29.55 W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S1_Conv2d_64x1x10x4_Relu ======================= +S1_Conv2d_64x1x10x4_Relu Partition[0] Size = 39409 (Min: 200, Max: 55313), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S1_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S1_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S2_Conv2d_64x1x3x3_Relu ======================= +S2_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S2_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S2_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_64x64x1x1_Relu ======================= +S3_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S3_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x3x3_Relu ======================= +S4_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S4_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S4_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_64x64x1x1_Relu ======================= +S5_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S5_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S6_Conv2d_64x1x3x3_Relu ======================= +S6_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S6_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S6_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_64x64x1x1_Relu ======================= +S7_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S7_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S8_Conv2d_64x1x3x3_Relu ======================= +S8_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S8_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S8_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_64x64x1x1_Relu ======================= +S9_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S9_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S10_AveragePool_25x5 ======================= +S10_AveragePool_25x5 Partition[0] Size = 16277 (Min: 250, Max: 16173), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x64x1x1 ======================= +S11_Linear_12x64x1x1 Partition[0] Size = 1805 (Min: 0, Max: 1973), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x64x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_25x5 To S11_Linear_12x64x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_64x64x1x1_Relu To S10_AveragePool_25x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_64x1x3x3_Relu To S9_Conv2d_64x64x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x64x1x1_Relu To S8_Conv2d_64x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_64x1x3x3_Relu To S7_Conv2d_64x64x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_64x64x1x1_Relu To S6_Conv2d_64x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x3x3_Relu To S5_Conv2d_64x64x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_64x64x1x1_Relu To S4_Conv2d_64x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_64x1x3x3_Relu To S3_Conv2d_64x64x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_64x1x10x4_Relu To S2_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_25x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S1_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S1_Weights --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => S1_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S1_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S2_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S2_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S3_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S3_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S3_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S4_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S5_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S5_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S5_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S6_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S6_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S7_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S7_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S8_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S8_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S9_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S9_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S9_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_25x5, Operations: 8000 + I Buff In => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x64x1x1, Operations: 768 + I Buff In => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S2_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S3_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S4_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S5_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S6_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S7_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S8_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S9_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S11_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_25x5 +Generating Code For User Kernel: S11_Linear_12x64x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_SMALL.h --use_power +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DSMALL +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 24894 (Min: 0, Max: 25622), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 3.200000, Size: 4096, Total: 10576, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 3.200000, Size: 4096, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 15696, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 15856, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 15860, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 17908, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 22004, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 24052, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 24372, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 25360, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 25400, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 28600, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 28600, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 28600, Reusable Memory: 20136, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 28600 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 91/ 100 Accuracy: 91.00% +Pred/Tot: 186/ 200 Accuracy: 93.00% +Pred/Tot: 277/ 300 Accuracy: 92.33% +Pred/Tot: 364/ 400 Accuracy: 91.00% +Pred/Tot: 457/ 500 Accuracy: 91.40% +Pred/Tot: 546/ 600 Accuracy: 91.00% +Pred/Tot: 639/ 700 Accuracy: 91.29% +Pred/Tot: 730/ 800 Accuracy: 91.25% +Pred/Tot: 819/ 900 Accuracy: 91.00% +Pred/Tot: 912/1000 Accuracy: 91.20% +Pred/Tot: 1006/1100 Accuracy: 91.45% +Pred/Tot: 1098/1200 Accuracy: 91.50% +Pred/Tot: 1188/1300 Accuracy: 91.38% +Pred/Tot: 1281/1400 Accuracy: 91.50% +Pred/Tot: 1371/1500 Accuracy: 91.40% +Pred/Tot: 1467/1600 Accuracy: 91.69% +Pred/Tot: 1559/1700 Accuracy: 91.71% +Pred/Tot: 1654/1800 Accuracy: 91.89% +Pred/Tot: 1745/1900 Accuracy: 91.84% +Pred/Tot: 1838/2000 Accuracy: 91.90% +Pred/Tot: 1933/2100 Accuracy: 92.05% +Pred/Tot: 2028/2200 Accuracy: 92.18% +Pred/Tot: 2122/2300 Accuracy: 92.26% +Pred/Tot: 2215/2400 Accuracy: 92.29% +Pred/Tot: 2303/2500 Accuracy: 92.12% +Pred/Tot: 2392/2600 Accuracy: 92.00% +Pred/Tot: 2486/2700 Accuracy: 92.07% +Pred/Tot: 2580/2800 Accuracy: 92.14% +Pred/Tot: 2670/2900 Accuracy: 92.07% +Pred/Tot: 2761/3000 Accuracy: 92.03% +Pred/Tot: 2856/3100 Accuracy: 92.13% +Pred/Tot: 2950/3200 Accuracy: 92.19% +Pred/Tot: 3047/3300 Accuracy: 92.33% +Pred/Tot: 3138/3400 Accuracy: 92.29% +Pred/Tot: 3233/3500 Accuracy: 92.37% +Pred/Tot: 3328/3600 Accuracy: 92.44% +Pred/Tot: 3418/3700 Accuracy: 92.38% +Pred/Tot: 3510/3800 Accuracy: 92.37% +Pred/Tot: 3603/3900 Accuracy: 92.38% +Pred/Tot: 3696/4000 Accuracy: 92.40% +Pred/Tot: 3789/4100 Accuracy: 92.41% +Pred/Tot: 3883/4200 Accuracy: 92.45% +Pred/Tot: 3974/4300 Accuracy: 92.42% +Pred/Tot: 4071/4400 Accuracy: 92.52% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4112/4444 Accuracy: 92.53% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 303 0 5 2 8 4 14 11 1 8 14] + [ 0 4 382 1 0 0 7 1 0 0 0 2] + [ 0 12 5 368 0 5 1 3 0 0 1 11] + [ 0 7 1 0 320 3 0 1 0 7 9 2] + [ 0 5 1 9 0 355 0 0 0 0 2 5] + [ 0 9 6 0 0 0 333 2 0 0 0 2] + [ 0 10 1 1 0 0 2 347 0 0 0 2] + [ 1 7 0 0 6 1 1 2 341 2 0 2] + [ 0 3 0 0 24 0 1 0 11 321 9 4] + [ 2 2 0 1 6 1 0 0 0 1 336 1] + [ 0 10 1 11 0 9 0 0 2 0 4 335]] +Pred/Tot: 92/ 100 Accuracy: 92.00% +Pred/Tot: 185/ 200 Accuracy: 92.50% +Pred/Tot: 277/ 300 Accuracy: 92.33% +Pred/Tot: 371/ 400 Accuracy: 92.75% +Pred/Tot: 459/ 500 Accuracy: 91.80% +Pred/Tot: 551/ 600 Accuracy: 91.83% +Pred/Tot: 645/ 700 Accuracy: 92.14% +Pred/Tot: 739/ 800 Accuracy: 92.38% +Pred/Tot: 833/ 900 Accuracy: 92.56% +Pred/Tot: 927/1000 Accuracy: 92.70% +Pred/Tot: 1014/1100 Accuracy: 92.18% +Pred/Tot: 1108/1200 Accuracy: 92.33% +Pred/Tot: 1198/1300 Accuracy: 92.15% +Pred/Tot: 1291/1400 Accuracy: 92.21% +Pred/Tot: 1380/1500 Accuracy: 92.00% +Pred/Tot: 1476/1600 Accuracy: 92.25% +Pred/Tot: 1569/1700 Accuracy: 92.29% +Pred/Tot: 1659/1800 Accuracy: 92.17% +Pred/Tot: 1750/1900 Accuracy: 92.11% +Pred/Tot: 1842/2000 Accuracy: 92.10% +Pred/Tot: 1935/2100 Accuracy: 92.14% +Pred/Tot: 2030/2200 Accuracy: 92.27% +Pred/Tot: 2122/2300 Accuracy: 92.26% +Pred/Tot: 2216/2400 Accuracy: 92.33% +Pred/Tot: 2309/2500 Accuracy: 92.36% +Pred/Tot: 2401/2600 Accuracy: 92.35% +Pred/Tot: 2495/2700 Accuracy: 92.41% +Pred/Tot: 2587/2800 Accuracy: 92.39% +Pred/Tot: 2678/2900 Accuracy: 92.34% +Pred/Tot: 2770/3000 Accuracy: 92.33% +Pred/Tot: 2865/3100 Accuracy: 92.42% +Pred/Tot: 2958/3200 Accuracy: 92.44% +Pred/Tot: 3057/3300 Accuracy: 92.64% +Pred/Tot: 3148/3400 Accuracy: 92.59% +Pred/Tot: 3241/3500 Accuracy: 92.60% +Pred/Tot: 3340/3600 Accuracy: 92.78% +Pred/Tot: 3431/3700 Accuracy: 92.73% +Pred/Tot: 3522/3800 Accuracy: 92.68% +Pred/Tot: 3617/3900 Accuracy: 92.74% +Pred/Tot: 3713/4000 Accuracy: 92.83% +Pred/Tot: 3806/4100 Accuracy: 92.83% +Pred/Tot: 3900/4200 Accuracy: 92.86% +Pred/Tot: 3997/4300 Accuracy: 92.95% +Pred/Tot: 4087/4400 Accuracy: 92.89% +Pred/Tot: 4179/4500 Accuracy: 92.87% +Pred/Tot: 4270/4600 Accuracy: 92.83% +Pred/Tot: 4362/4700 Accuracy: 92.81% +Pred/Tot: 4458/4800 Accuracy: 92.88% + +FINAL TESTING ACCURACY: +Pred/Tot: 4540/4889 Accuracy: 92.86% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 358 1 4 2 8 3 9 7 2 8 6] + [ 1 11 391 2 0 1 11 0 0 0 1 1] + [ 0 7 1 384 0 3 1 1 0 0 1 7] + [ 0 6 0 0 392 0 1 0 5 10 7 4] + [ 0 9 3 11 1 372 0 0 0 0 1 9] + [ 0 11 9 0 2 0 384 5 0 0 1 0] + [ 0 22 0 0 2 3 4 363 0 0 1 1] + [ 1 9 0 0 4 4 0 1 370 3 1 3] + [ 1 7 0 1 15 0 2 0 15 350 2 9] + [ 0 2 0 1 3 1 0 0 0 0 399 5] + [ 0 5 0 16 2 5 3 1 0 0 1 369]] diff --git a/accuracy_log/log_test_small_power_v2.txt b/accuracy_log/log_test_small_power_v2.txt new file mode 100644 index 0000000..660e9c2 --- /dev/null +++ b/accuracy_log/log_test_small_power_v2.txt @@ -0,0 +1,2028 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant_power.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 1 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=1 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_s_quant_power.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -29.55<(i8-0.00)*0.23083353<29.32 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -32.00<(i8-0.00)*0.25000000<31.75 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -29.55<(i8-0.00)*0.23083353<29.32 need -32.00<(i8-0.00)*0.25000000<31.75 forced +go backwackwards to F 12x1x1x64 B 1 +backwards FULLY_CONNECTED_0_10 in: -11.15<(i8-0.00)*0.08714711<11.07,chan<(i8-0.00)*chan W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x10x4_Relu ======================= +S4_Conv2d_64x1x10x4_Relu Partition[0] Size = 39393 (Min: 200, Max: 55185), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S4_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: In, Size: 492, Base1: 0, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Bias, Size: 256, Base1: 492, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Scale, Size: 64, Base1: 748, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ScaleN, Size: 64, Base1: 812, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Filter, Size: 2560, Base1: 876, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Out, Size: 8000, Base1: 3436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ConvOut, Size: 32000, Base1: 11436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 43436, Base2: 0 +S4_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S7_Conv2d_64x1x3x3_Relu ======================= +S7_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S7_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S7_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_64x64x1x1_Relu ======================= +S10_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S10_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S13_Conv2d_64x1x3x3_Relu ======================= +S13_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S13_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S13_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_64x64x1x1_Relu ======================= +S16_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S16_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S19_Conv2d_64x1x3x3_Relu ======================= +S19_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S19_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S19_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_64x64x1x1_Relu ======================= +S22_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S22_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S25_Conv2d_64x1x3x3_Relu ======================= +S25_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S25_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S25_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_64x64x1x1_Relu ======================= +S28_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S28_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S29_AveragePool_25x5 ======================= +S29_AveragePool_25x5 Partition[0] Size = 16271 (Min: 250, Max: 16155), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 25, TileOverlap: 23, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_25x5, Arg: In, Size: 8000, Base1: 0, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Out, Size: 64, Base1: 8000, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Infos, Size: 12, Base1: 8064, Base2: 0 +S29_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x64x1x1 ======================= +S32_Linear_12x64x1x1 Partition[0] Size = 1791 (Min: 0, Max: 1875), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x64x1x1, Arg: In, Size: 64, Base1: 0, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Filter, Size: 768, Base1: 64, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Bias, Size: 48, Base1: 832, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Out, Size: 12, Base1: 880, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Scale, Size: 12, Base1: 892, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: ScaleN, Size: 12, Base1: 904, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Infos, Size: 12, Base1: 916, Base2: 0 +S32_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x64x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_25x5 To S32_Linear_12x64x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_64x64x1x1_Relu To S29_AveragePool_25x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_64x1x3x3_Relu To S28_Conv2d_64x64x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_64x64x1x1_Relu To S25_Conv2d_64x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_64x1x3x3_Relu To S22_Conv2d_64x64x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_64x64x1x1_Relu To S19_Conv2d_64x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_64x1x3x3_Relu To S16_Conv2d_64x64x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_64x64x1x1_Relu To S13_Conv2d_64x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x1x3x3_Relu To S10_Conv2d_64x64x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x10x4_Relu To S7_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_25x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S4_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S10_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S13_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S16_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S19_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S22_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S25_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S28_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_25x5, Operations: 8000 + I Buff In => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x64x1x1, Operations: 768 + I Buff In => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S7_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S13_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S16_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S19_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S22_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S25_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S28_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S29_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S32_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_25x5 +Generating Code For User Kernel: S32_Linear_12x64x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 13:21:00.181645 139822583715648 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 13:21:00.182285 139822583715648 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 13:21:00.182616 139822583715648 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 13:21:00.184083: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 13:21:00.193405: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 13:21:00.193914: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x56430a0a74c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 13:21:00.193983: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 13:21:00.196430: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 13:21:00.196607: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 13:21:00.196642: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 13:22:38.087795 139822583715648 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 13:22:38.089494 139822583715648 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 13:22:38.438487 139822583715648 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 13:22:38.438748 139822583715648 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 13:24:19.643186 139822583715648 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': True} +Pred/Tot: 93/ 100 Accuracy: 93.00% +Pred/Tot: 190/ 200 Accuracy: 95.00% +Pred/Tot: 282/ 300 Accuracy: 94.00% +Pred/Tot: 372/ 400 Accuracy: 93.00% +Pred/Tot: 466/ 500 Accuracy: 93.20% +Pred/Tot: 558/ 600 Accuracy: 93.00% +Pred/Tot: 649/ 700 Accuracy: 92.71% +Pred/Tot: 741/ 800 Accuracy: 92.62% +Pred/Tot: 831/ 900 Accuracy: 92.33% +Pred/Tot: 927/1000 Accuracy: 92.70% +Pred/Tot: 1020/1100 Accuracy: 92.73% +Pred/Tot: 1109/1200 Accuracy: 92.42% +Pred/Tot: 1200/1300 Accuracy: 92.31% +Pred/Tot: 1291/1400 Accuracy: 92.21% +Pred/Tot: 1381/1500 Accuracy: 92.07% +Pred/Tot: 1477/1600 Accuracy: 92.31% +Pred/Tot: 1570/1700 Accuracy: 92.35% +Pred/Tot: 1667/1800 Accuracy: 92.61% +Pred/Tot: 1761/1900 Accuracy: 92.68% +Pred/Tot: 1856/2000 Accuracy: 92.80% +Pred/Tot: 1953/2100 Accuracy: 93.00% +Pred/Tot: 2047/2200 Accuracy: 93.05% +Pred/Tot: 2142/2300 Accuracy: 93.13% +Pred/Tot: 2234/2400 Accuracy: 93.08% +Pred/Tot: 2325/2500 Accuracy: 93.00% +Pred/Tot: 2415/2600 Accuracy: 92.88% +Pred/Tot: 2510/2700 Accuracy: 92.96% +Pred/Tot: 2604/2800 Accuracy: 93.00% +Pred/Tot: 2694/2900 Accuracy: 92.90% +Pred/Tot: 2785/3000 Accuracy: 92.83% +Pred/Tot: 2881/3100 Accuracy: 92.94% +Pred/Tot: 2974/3200 Accuracy: 92.94% +Pred/Tot: 3071/3300 Accuracy: 93.06% +Pred/Tot: 3162/3400 Accuracy: 93.00% +Pred/Tot: 3256/3500 Accuracy: 93.03% +Pred/Tot: 3348/3600 Accuracy: 93.00% +Pred/Tot: 3436/3700 Accuracy: 92.86% +Pred/Tot: 3530/3800 Accuracy: 92.89% +Pred/Tot: 3623/3900 Accuracy: 92.90% +Pred/Tot: 3713/4000 Accuracy: 92.83% +Pred/Tot: 3808/4100 Accuracy: 92.88% +Pred/Tot: 3901/4200 Accuracy: 92.88% +Pred/Tot: 3994/4300 Accuracy: 92.88% +Pred/Tot: 4088/4400 Accuracy: 92.91% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 4127/4444 Accuracy: 92.87% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 319 0 4 1 6 4 8 10 3 4 11] + [ 0 6 382 3 0 0 2 1 0 0 0 3] + [ 2 7 4 374 0 3 1 3 0 0 1 11] + [ 0 8 0 0 319 2 0 0 0 8 10 3] + [ 0 6 1 8 1 350 0 0 0 0 4 7] + [ 0 13 9 0 0 0 323 6 0 0 1 0] + [ 0 9 0 0 0 0 2 351 0 0 0 1] + [ 1 6 1 0 6 0 1 2 340 4 0 2] + [ 0 3 0 0 22 0 0 0 12 323 8 5] + [ 0 4 0 0 8 1 1 0 1 2 331 2] + [ 0 10 1 6 2 4 0 0 2 0 3 344]] +Pred/Tot: 92/ 100 Accuracy: 92.00% +Pred/Tot: 189/ 200 Accuracy: 94.50% +Pred/Tot: 284/ 300 Accuracy: 94.67% +Pred/Tot: 377/ 400 Accuracy: 94.25% +Pred/Tot: 465/ 500 Accuracy: 93.00% +Pred/Tot: 557/ 600 Accuracy: 92.83% +Pred/Tot: 653/ 700 Accuracy: 93.29% +Pred/Tot: 748/ 800 Accuracy: 93.50% +Pred/Tot: 842/ 900 Accuracy: 93.56% +Pred/Tot: 939/1000 Accuracy: 93.90% +Pred/Tot: 1027/1100 Accuracy: 93.36% +Pred/Tot: 1124/1200 Accuracy: 93.67% +Pred/Tot: 1215/1300 Accuracy: 93.46% +Pred/Tot: 1308/1400 Accuracy: 93.43% +Pred/Tot: 1400/1500 Accuracy: 93.33% +Pred/Tot: 1496/1600 Accuracy: 93.50% +Pred/Tot: 1588/1700 Accuracy: 93.41% +Pred/Tot: 1679/1800 Accuracy: 93.28% +Pred/Tot: 1771/1900 Accuracy: 93.21% +Pred/Tot: 1865/2000 Accuracy: 93.25% +Pred/Tot: 1959/2100 Accuracy: 93.29% +Pred/Tot: 2055/2200 Accuracy: 93.41% +Pred/Tot: 2147/2300 Accuracy: 93.35% +Pred/Tot: 2243/2400 Accuracy: 93.46% +Pred/Tot: 2336/2500 Accuracy: 93.44% +Pred/Tot: 2426/2600 Accuracy: 93.31% +Pred/Tot: 2519/2700 Accuracy: 93.30% +Pred/Tot: 2610/2800 Accuracy: 93.21% +Pred/Tot: 2700/2900 Accuracy: 93.10% +Pred/Tot: 2792/3000 Accuracy: 93.07% +Pred/Tot: 2887/3100 Accuracy: 93.13% +Pred/Tot: 2982/3200 Accuracy: 93.19% +Pred/Tot: 3080/3300 Accuracy: 93.33% +Pred/Tot: 3176/3400 Accuracy: 93.41% +Pred/Tot: 3273/3500 Accuracy: 93.51% +Pred/Tot: 3372/3600 Accuracy: 93.67% +Pred/Tot: 3464/3700 Accuracy: 93.62% +Pred/Tot: 3560/3800 Accuracy: 93.68% +Pred/Tot: 3655/3900 Accuracy: 93.72% +Pred/Tot: 3749/4000 Accuracy: 93.73% +Pred/Tot: 3842/4100 Accuracy: 93.71% +Pred/Tot: 3936/4200 Accuracy: 93.71% +Pred/Tot: 4033/4300 Accuracy: 93.79% +Pred/Tot: 4127/4400 Accuracy: 93.80% +Pred/Tot: 4219/4500 Accuracy: 93.76% +Pred/Tot: 4309/4600 Accuracy: 93.67% +Pred/Tot: 4402/4700 Accuracy: 93.66% +Pred/Tot: 4497/4800 Accuracy: 93.69% + +FINAL TESTING ACCURACY: +Pred/Tot: 4579/4889 Accuracy: 93.66% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 361 0 7 3 4 4 9 7 1 7 5] + [ 0 12 396 5 0 0 4 0 0 0 1 1] + [ 1 6 1 384 0 3 1 1 0 0 0 8] + [ 0 9 0 0 389 0 1 0 3 12 9 2] + [ 0 9 2 10 0 369 0 2 0 0 3 11] + [ 0 4 7 0 2 0 395 3 0 0 1 0] + [ 0 11 0 0 1 1 2 379 0 0 1 1] + [ 0 11 0 0 3 0 0 0 369 6 1 6] + [ 1 7 0 1 16 0 3 0 6 361 0 7] + [ 0 3 0 0 3 4 0 0 0 0 396 5] + [ 0 7 0 14 2 1 2 2 1 1 0 372]] diff --git a/accuracy_log/log_test_small_spectr.txt b/accuracy_log/log_test_small_spectr.txt new file mode 100644 index 0000000..9a672ac --- /dev/null +++ b/accuracy_log/log_test_small_spectr.txt @@ -0,0 +1,1457 @@ +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 --use_high_prec 0 +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=0 USE_HIGH_PREC=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/generators/nntool_extra_generators.c +rm -f BUILD_MODEL_8BIT_EMUL/GenTile +rm -f -rf BUILD_MODEL_8BIT_EMUL +rm -f BUILD_MODEL_8BIT_EMUL/*.dat +rm -rf /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_8BIT_EMUL +cp model/KWS_ds_cnn_s_quant.tflite BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL" +GENERATING AUTOTILER MODEL BUILD_MODEL_8BIT_EMUL +sed -e "s|MODEL_SRC|KWS_ds_cnn_s_quantModel.c|g" -e "s|TENSORS_DIR|BUILD_MODEL_8BIT_EMUL/tensors|g" -e "s|MODEL_BUILD|BUILD_MODEL_8BIT_EMUL|g" -e "s|GRAPH_DUMP||g" -e "s|LARGE_OPT||g" \ + model/nntool_script_params > model/nntool_script +nntool -s model/nntool_script BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +load_quantization - was: False +now: True +open - opening graph file BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quant.tflite load_quantizaion = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +debug - was: False +now: True +adjust_order - adding transposes to correct tensor order for AT kernels +set_aliases - looking for aliased edges +eliminate_transposes - eliminating unnecessary transposes +eliminate_transposes - search for transposes +eliminate_transposes - ++ Starting up from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking up at input_1[0] transpose [2, 0, 1] +eliminate_transposes - accepted input_1 - input without fixed order - transpose input [2, 0, 1] +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_0_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_1 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_0[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_1_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_2[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_2 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_1[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_2[0] +eliminate_transposes - looking down at CONV_2D_0_2_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_3 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_2[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_3_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_4[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_4 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_3[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_4[0] +eliminate_transposes - looking down at CONV_2D_0_4_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_5 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_4[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_5_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_6[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_6 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_5[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_6[0] +eliminate_transposes - looking down at CONV_2D_0_6_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7[0] transpose [1, 2, 0] +eliminate_transposes - accepted DEPTHWISE_CONV_2D_0_7 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_6[0] +eliminate_transposes - ++ Starting down from DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - looking down at DEPTHWISE_CONV_2D_0_7_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at CONV_2D_0_8[0] transpose [1, 2, 0] +eliminate_transposes - accepted CONV_2D_0_8 - transpose in (1) +eliminate_transposes - ++ Found results for DEPTHWISE_CONV_2D_0_7[0] +eliminate_transposes - ++ Starting down from CONV_2D_0_8[0] +eliminate_transposes - looking down at CONV_2D_0_8_activation[0] transpose [1, 2, 0] +eliminate_transposes - looking down at AVERAGE_POOL_2D_0_9[0] transpose [1, 2, 0] +eliminate_transposes - accepted AVERAGE_POOL_2D_0_9 - transpose in (1) +eliminate_transposes - ++ Found results for CONV_2D_0_8[0] +eliminate_transposes - ++ Starting down from AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - looking down at FULLY_CONNECTED_0_10[0] transpose [1, 2, 0] +eliminate_transposes - accepted FULLY_CONNECTED_0_10 - linear layer reorder input +eliminate_transposes - ++ Found results for AVERAGE_POOL_2D_0_9[0] +eliminate_transposes - eliminate transposes +eliminate_transposes_actions - Start Action (up): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose in[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - input_1 input dims with (2, 0, 1) +eliminate_transposes_actions - End Action (up): input_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_0 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_0 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_1 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_1 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_2 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_2 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_2 +eliminate_transposes_actions - CONV_2D_0_2 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_2 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_3 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_3 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_4 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_4 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_4 +eliminate_transposes_actions - CONV_2D_0_4 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_4 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_5 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_5 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_6 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_6 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_6 +eliminate_transposes_actions - CONV_2D_0_6 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_6 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose in[0] +eliminate_transposes_actions - End Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - Start Action (down): DEPTHWISE_CONV_2D_0_7 +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 delete transpose out[0] +eliminate_transposes_actions - DEPTHWISE_CONV_2D_0_7 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - CONV_2D_0_8 delete transpose in[0] +eliminate_transposes_actions - End Action (down): CONV_2D_0_8 +eliminate_transposes_actions - Start Action (down): CONV_2D_0_8 +eliminate_transposes_actions - CONV_2D_0_8 delete transpose out[0] +eliminate_transposes_actions - CONV_2D_0_8 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint in[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose in[0] +eliminate_transposes_actions - End Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - Start Action (down): AVERAGE_POOL_2D_0_9 +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 delete transpose out[0] +eliminate_transposes_actions - AVERAGE_POOL_2D_0_9 set hint out[0] to ['c', 'h', 'w'] +eliminate_transposes_actions - reorder linear layer FULLY_CONNECTED_0_10 in with shape 1x1x64 transposed (2, 0, 1) +eliminate_transposes_actions - End Action (down): FULLY_CONNECTED_0_10 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - search for transposes +eliminate_transposes - no transposes to eliminate found +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +eliminate_transposes - no further transpose sequences found +set_aliases - looking for aliased edges +nngraph - adjusted order +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start remove_relus +matcher - fusions - start remove_noops +matcher - fusions - start fuse_external_bias_sq8 +matcher - fusions - start fuse_pad +matcher - fusions - start match_duplicate_operations +matcher - fusions - start gather_to_split +matcher - fusions - start slice_to_split +matcher - fusions - start unused_concats +matcher - fusions - start find_missing_quantization +matcher - fusions - start rnn_reverse +matcher - fusions - start rnn_unpack +matcher - fusions - start match_far_hsigmoid +matcher - fusions - start match_close_hsigmoid +matcher - fusions - start expand_transposes +matcher - fusions - start move_pooling_scale8 +matcher - fusions - start move_activations_scale8 +matcher - fusions - start fuse_gap_convs +matcher - fusions - start match_conv_active_pool +matcher - fusions - start match_conv_pool_active +matcher - fusions - start match_conv_active +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_0,DEPTHWISE_CONV_2D_0_0_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_1,DEPTHWISE_CONV_2D_0_1_activation +match_gap_conv - fusing nodes CONV_2D_0_2,CONV_2D_0_2_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_3,DEPTHWISE_CONV_2D_0_3_activation +match_gap_conv - fusing nodes CONV_2D_0_4,CONV_2D_0_4_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_5,DEPTHWISE_CONV_2D_0_5_activation +match_gap_conv - fusing nodes CONV_2D_0_6,CONV_2D_0_6_activation +match_gap_conv - fusing nodes DEPTHWISE_CONV_2D_0_7,DEPTHWISE_CONV_2D_0_7_activation +match_gap_conv - fusing nodes CONV_2D_0_8,CONV_2D_0_8_activation +matcher - fusions - match_conv_active modified graph +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +matcher - fusions - start match_conv_pool +matcher - fusions - start fuse_gap_linear +matcher - fusions - start fuse_op_activation_scale8 +matcher - fusions - start propagate_softmax_sym_qrec +equalize_sym_mult_concats - propagating scale up from node SOFTMAX_0_11 to node FULLY_CONNECTED_0_10 +matcher - fusions - start equalize_sm_concats +matcher - fusions - start filter_bigger_than_input +matcher - fusions - start insert_copies +matcher - fusions - start propagate_up_rnn_in_qs +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| Step | Step name | Operation | Input Dims | Output Dims | Inputs | Active | Params | Ops | Params | Hints | +| | | | (cxhxw) | (cxhxw) | | size | size | | | | ++======+=============================+=========================+============+=============+========+========+========+=========+============================+======================+ +| 0 | input_1 | input | 1x49x10 | 1x49x10 | | 490 | 0 | | I 1x49x10 FIXED_ORDER=0 | in: hxwxc out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 1 | DEPTHWISE_CONV_2D_0_0_fusio | conv_fusion_conv_active | 1x49x10 | 64x25x5 | 0 | 8490 | 2624 | 320.00K | F 64x1x10x4 S 2x2 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 1 M 1 P 4x5x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 2 | DEPTHWISE_CONV_2D_0_1_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 1 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 3 | CONV_2D_0_2_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 2 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 4 | DEPTHWISE_CONV_2D_0_3_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 3 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 5 | CONV_2D_0_4_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 4 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 6 | DEPTHWISE_CONV_2D_0_5_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 5 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 7 | CONV_2D_0_6_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 6 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 8 | DEPTHWISE_CONV_2D_0_7_fusio | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 7 | 16000 | 73 | 72.00K | F 64x1x3x3 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | n | | | | | | | | 64 M 1 P 1x1x1x1 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 9 | CONV_2D_0_8_fusion | conv_fusion_conv_active | 64x25x5 | 64x25x5 | 8 | 16000 | 4160 | 512.00K | F 64x64x1x1 S 1x1 D 1x1 G | in: cxhxw out: cxhxw | +| | | | | | | | | | 1 M 1 P 0x0x0x0 zero, | | +| | | | | | | | | | Activation relu | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 10 | AVERAGE_POOL_2D_0_9 | average_pool | 64x25x5 | 64x1x1 | 9 | 8064 | 0 | 8.06K | T average F 25x5 S 2x2 P | in: cxhxw out: cxhxw | +| | | | | | | | | | 0x0x0x0 zero | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 11 | FULLY_CONNECTED_0_10 | linear | 64x1x1 | 12 | 10 | 76 | 780 | 768 | F 12x64x1x1 | in: cx0x1 out: c | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 12 | SOFTMAX_0_11 | softmax | 12 | 12 | 11 | 24 | 0 | 24 | Beta 0.0 Axis None | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| 13 | output_1 | output | 12 | 12 | 12 | 12 | 0 | | O 12 FIXED_ORDER=0 | in: none out: none | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | 16000 | 20336 | 2.66M | | | +| | Max active/Total params | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ +| | Totals (#) | | | | | | 36336 | 2.66M | | | +| | Max mem usage | | | | | | | | | | ++------+-----------------------------+-------------------------+------------+-------------+--------+--------+--------+---------+----------------------------+----------------------+ ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| Step | Name | In | Out | Weights | Bias | Mulbias | Calc | Acc | ++======+==================+==================+=================+=================+==========+==========+=======+=======+ +| 0 | input_1 | -247.97>chan | Q32.0 | Q32.0 | +| | D_0_0 | 27124<246.03 | 37761<30.02 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 1 | DEPTHWISE_CONV_2 | -30.26>chan | Q32.0 | Q32.0 | +| | D_0_1 | 7761<30.02 | 31869<31.54 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 2 | DEPTHWISE_CONV_2 | -31.78>chan | Q32.0 | Q32.0 | +| | | 1869<31.54 | 76032<27.15 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 3 | CONV_2D_0_2_acti | -27.36>chan | Q32.0 | Q32.0 | +| | D_0_3 | 6032<27.15 | 53918<26.99 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 4 | DEPTHWISE_CONV_2 | -27.21>chan | Q32.0 | Q32.0 | +| | | 3918<26.99 | 48160<21.91 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 5 | CONV_2D_0_4_acti | -22.08>chan | Q32.0 | Q32.0 | +| | D_0_5 | 8160<21.91 | 48363<22.03 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 6 | DEPTHWISE_CONV_2 | -22.21>chan | Q32.0 | Q32.0 | +| | | 8363<22.03 | 22380<16.54 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 7 | CONV_2D_0_6_acti | -16.67>chan | Q32.0 | Q32.0 | +| | D_0_7 | 2380<16.54 | 42478<15.17 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 8 | DEPTHWISE_CONV_2 | -15.29>chan | Q32.0 | Q32.0 | +| | | 2478<15.17 | 86798<11.29 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 9 | CONV_2D_0_8_acti | -11.38>chan | Q32.0 | Q32.0 | +| | 0_10 | 6798<11.29 | 00000<12.09 | an | | | | | ++------+------------------+------------------+-----------------+-----------------+----------+----------+-------+-------+ +| 12 | SOFTMAX_0_11 | -12.19 W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S1_Conv2d_64x1x10x4_Relu ======================= +S1_Conv2d_64x1x10x4_Relu Partition[0] Size = 39409 (Min: 200, Max: 55313), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S1_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S1_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S2_Conv2d_64x1x3x3_Relu ======================= +S2_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S2_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S2_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S2_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S2_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S2_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S3_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S3_Conv2d_64x64x1x1_Relu ======================= +S3_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S3_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S3_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S3_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x3x3_Relu ======================= +S4_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S4_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S4_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S4_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S4_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S4_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S5_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S5_Conv2d_64x64x1x1_Relu ======================= +S5_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S5_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S5_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S5_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S6_Conv2d_64x1x3x3_Relu ======================= +S6_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S6_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S6_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S6_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S6_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S6_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S7_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S7_Conv2d_64x64x1x1_Relu ======================= +S7_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S7_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S7_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S7_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S8_Conv2d_64x1x3x3_Relu ======================= +S8_Conv2d_64x1x3x3_Relu Partition[0] Size = 36521 (Min: 30, Max: 67465), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 bytes will require 256 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 bytes will require 64 bytes buffer +S8_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 bytes will require 576 bytes buffer +S8_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S8_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S8_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +S8_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S9_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_ReLU_SQ8 + +==== Process Tiling For User Kernel: S9_Conv2d_64x64x1x1_Relu ======================= +S9_Conv2d_64x64x1x1_Relu Partition[0] Size = 1440 (Min: 1024, Max: 8704), Fraction: 0.52, Giving: 8704 bytes out of 48736 bytes +S9_Conv2d_64x64x1x1_Relu Partition[1] Size = 1321 (Min: 512, Max: 32393), Fraction: 0.48, Giving: 40031 bytes out of 48736 bytes +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40031. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40031, Used L1 Memory: 16268, Reusable Memory: 23760, Used L2 Memory: 0 +Kernel: S9_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8704. Promoting all kernel arguments to initialized buffers. +S9_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8704, Used L1 Memory: 4480, Reusable Memory: 4224, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S10_AveragePool_25x5 ======================= +S10_AveragePool_25x5 Partition[0] Size = 16277 (Min: 250, Max: 16173), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S10_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S10_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S11_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S11_Linear_12x64x1x1 ======================= +S11_Linear_12x64x1x1 Partition[0] Size = 1805 (Min: 0, Max: 1973), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S11_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +S11_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S12_SoftMax ======================= + S12_SoftMax Partition[0] Size = 57 (Min: 8, Max: 81), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes +Kernel: S12_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. + S12_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S11_Output[ In] Adding Edge From S11_Linear_12x64x1x1 To S12_SoftMax New + Symbol: S10_Output[ In] Adding Edge From S10_AveragePool_25x5 To S11_Linear_12x64x1x1 New + Symbol: S9_Output[ In] Adding Edge From S9_Conv2d_64x64x1x1_Relu To S10_AveragePool_25x5 New + Symbol: S8_Output[ In] Adding Edge From S8_Conv2d_64x1x3x3_Relu To S9_Conv2d_64x64x1x1_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x64x1x1_Relu To S8_Conv2d_64x1x3x3_Relu New + Symbol: S6_Output[ In] Adding Edge From S6_Conv2d_64x1x3x3_Relu To S7_Conv2d_64x64x1x1_Relu New + Symbol: S5_Output[ In] Adding Edge From S5_Conv2d_64x64x1x1_Relu To S6_Conv2d_64x1x3x3_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x3x3_Relu To S5_Conv2d_64x64x1x1_Relu New + Symbol: S3_Output[ In] Adding Edge From S3_Conv2d_64x64x1x1_Relu To S4_Conv2d_64x1x3x3_Relu New + Symbol: S2_Output[ In] Adding Edge From S2_Conv2d_64x1x3x3_Relu To S3_Conv2d_64x64x1x1_Relu New + Symbol: S1_Output[ In] Adding Edge From S1_Conv2d_64x1x10x4_Relu To S2_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[Undef] Adding Edge From S12_SoftMax To __GraphExit__ New + Symbol: S12_Infos[ In] Adding Edge From __GraphEntry__ To S12_SoftMax New + Symbol: S11_Mul_shift[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 New + Symbol: S11_Mul_scale[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Biases[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Weights[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S11_Infos[ In] Adding Edge From __GraphEntry__ To S11_Linear_12x64x1x1 Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_AveragePool_25x5 New + Symbol: S9_Mul_shift[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu New + Symbol: S9_Mul_scale[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Biases[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Weights[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S9_Infos[ In] Adding Edge From __GraphEntry__ To S9_Conv2d_64x64x1x1_Relu Exists + Symbol: S8_Mul_shift[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu New + Symbol: S8_Mul_scale[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Biases[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Weights[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S8_Infos[ In] Adding Edge From __GraphEntry__ To S8_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu New + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Biases[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Weights[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x64x1x1_Relu Exists + Symbol: S6_Mul_shift[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu New + Symbol: S6_Mul_scale[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Biases[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Weights[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S6_Infos[ In] Adding Edge From __GraphEntry__ To S6_Conv2d_64x1x3x3_Relu Exists + Symbol: S5_Mul_shift[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu New + Symbol: S5_Mul_scale[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Biases[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Weights[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S5_Infos[ In] Adding Edge From __GraphEntry__ To S5_Conv2d_64x64x1x1_Relu Exists + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu New + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Biases[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Weights[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x3x3_Relu Exists + Symbol: S3_Mul_shift[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu New + Symbol: S3_Mul_scale[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Biases[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Weights[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S3_Infos[ In] Adding Edge From __GraphEntry__ To S3_Conv2d_64x64x1x1_Relu Exists + Symbol: S2_Mul_shift[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu New + Symbol: S2_Mul_scale[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Biases[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Weights[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S2_Infos[ In] Adding Edge From __GraphEntry__ To S2_Conv2d_64x1x3x3_Relu Exists + Symbol: S1_Mul_shift[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu New + Symbol: S1_Mul_scale[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Biases[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Weights[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S1_Infos[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists + Symbol: S0_Input_1[ In] Adding Edge From __GraphEntry__ To S1_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => S0_Input_1 + (null) => S1_Infos + (null) => S1_Weights + (null) => S1_Biases + (null) => S1_Mul_scale + (null) => S1_Mul_shift + (null) => S2_Infos + (null) => S2_Weights + (null) => S2_Biases + (null) => S2_Mul_scale + (null) => S2_Mul_shift + (null) => S3_Infos + (null) => S3_Weights + (null) => S3_Biases + (null) => S3_Mul_scale + (null) => S3_Mul_shift + (null) => S4_Infos + (null) => S4_Weights + (null) => S4_Biases + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S5_Infos + (null) => S5_Weights + (null) => S5_Biases + (null) => S5_Mul_scale + (null) => S5_Mul_shift + (null) => S6_Infos + (null) => S6_Weights + (null) => S6_Biases + (null) => S6_Mul_scale + (null) => S6_Mul_shift + (null) => S7_Infos + (null) => S7_Weights + (null) => S7_Biases + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S8_Infos + (null) => S8_Weights + (null) => S8_Biases + (null) => S8_Mul_scale + (null) => S8_Mul_shift + (null) => S9_Infos + (null) => S9_Weights + (null) => S9_Biases + (null) => S9_Mul_scale + (null) => S9_Mul_shift + (null) => S10_Infos + (null) => S11_Infos + (null) => S11_Weights + (null) => S11_Biases + (null) => S11_Mul_scale + (null) => S11_Mul_shift + (null) => S12_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [S0_Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S1_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => S0_Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S1_Weights --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => S1_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S1_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S1_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S1_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [S0_Input_1] [S1_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S2_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S1_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S2_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S2_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S2_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S2_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S2_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S1_Output] [S2_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S3_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S2_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S3_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S3_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S3_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S3_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S3_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S2_Output] [S3_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S4_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S3_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S4_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S4_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S3_Output] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S5_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S5_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S5_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S5_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S5_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S5_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S4_Output] [S5_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S6_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S5_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S6_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S6_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S6_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S6_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S6_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S5_Output] [S6_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S7_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S6_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S7_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S7_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S6_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S8_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S8_Weights --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => S8_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S8_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S8_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S8_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S7_Output] [S8_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S9_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S8_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => S9_Weights --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => S9_Biases --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S9_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S9_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S9_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S8_Output] [S9_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S10_AveragePool_25x5, Operations: 8000 + I Buff In => S9_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S9_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S11_Linear_12x64x1x1, Operations: 768 + I Buff In => S10_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => S11_Weights --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => S11_Biases --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S11_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S11_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S11_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S10_Output] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S12_SoftMax, Operations: 12 + I Buff In => S11_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S12_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S11_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + S0_Input_1 Externally allocated + S1_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + S1_Weights INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + S1_Biases INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S1_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S1_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S2_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + S2_Weights INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + S2_Biases INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S2_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S2_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S3_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + S3_Weights INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + S3_Biases INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S3_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S3_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + S4_Weights INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + S4_Biases INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S5_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + S5_Weights INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + S5_Biases INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S5_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S5_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S6_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + S6_Weights INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + S6_Biases INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S6_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S6_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + S7_Weights INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + S7_Biases INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S8_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + S8_Weights INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + S8_Biases INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S8_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S8_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S9_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S9_Weights INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + S9_Biases INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S9_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S9_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + S11_Infos INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 9 + S11_Weights INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + S11_Biases INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S11_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S11_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 12 + S12_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S1_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S2_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S3_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S4_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S5_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S6_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S7_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S8_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S9_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S11_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S1_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S2_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S3_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S4_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S5_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S6_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S7_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S8_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S9_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S10_AveragePool_25x5 +Generating Code For User Kernel: S11_Linear_12x64x1x1 +Generating Code For User Kernel: S12_SoftMax +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Weights.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S1_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S2_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S3_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S5_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S6_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Weights.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S8_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Weights.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Biases.tensor: 256 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S9_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Weights.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Biases.tensor: 48 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S11_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_8BIT_EMUL/tensors/S12_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : nntool_extra_kernels.h + : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_8BIT_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +mkdir /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL +python3 /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/GenLUT.py --fft_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/LUT.def --mfcc_bf_lut_file /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCC_FB.def \ + --sample_rate 16000 --frame_size 640 --frame_step 320 --n_frame 49 \ + --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ + --use_tf_mfcc --save_params_header MFCC_params_SMALL.h +(1024,) +Filter 0: Start: 2 Stop: 5 Base: 0 Items: 4 +Filter 1: Start: 4 Stop: 8 Base: 4 Items: 5 +Filter 2: Start: 6 Stop: 10 Base: 9 Items: 5 +Filter 3: Start: 9 Stop: 13 Base: 14 Items: 5 +Filter 4: Start: 11 Stop: 15 Base: 19 Items: 5 +Filter 5: Start: 14 Stop: 18 Base: 24 Items: 5 +Filter 6: Start: 16 Stop: 21 Base: 29 Items: 6 +Filter 7: Start: 19 Stop: 24 Base: 35 Items: 6 +Filter 8: Start: 22 Stop: 28 Base: 41 Items: 7 +Filter 9: Start: 25 Stop: 31 Base: 48 Items: 7 +Filter 10: Start: 29 Stop: 34 Base: 55 Items: 6 +Filter 11: Start: 32 Stop: 38 Base: 61 Items: 7 +Filter 12: Start: 35 Stop: 42 Base: 68 Items: 8 +Filter 13: Start: 39 Stop: 46 Base: 76 Items: 8 +Filter 14: Start: 43 Stop: 51 Base: 84 Items: 9 +Filter 15: Start: 47 Stop: 55 Base: 93 Items: 9 +Filter 16: Start: 52 Stop: 60 Base: 102 Items: 9 +Filter 17: Start: 56 Stop: 65 Base: 111 Items: 10 +Filter 18: Start: 61 Stop: 70 Base: 121 Items: 10 +Filter 19: Start: 66 Stop: 75 Base: 131 Items: 10 +Filter 20: Start: 71 Stop: 81 Base: 141 Items: 11 +Filter 21: Start: 76 Stop: 87 Base: 152 Items: 12 +Filter 22: Start: 82 Stop: 93 Base: 164 Items: 12 +Filter 23: Start: 88 Stop: 99 Base: 176 Items: 12 +Filter 24: Start: 94 Stop: 106 Base: 188 Items: 13 +Filter 25: Start: 100 Stop: 113 Base: 201 Items: 14 +Filter 26: Start: 107 Stop: 121 Base: 215 Items: 15 +Filter 27: Start: 114 Stop: 128 Base: 230 Items: 15 +Filter 28: Start: 122 Stop: 137 Base: 245 Items: 16 +Filter 29: Start: 129 Stop: 145 Base: 261 Items: 17 +Filter 30: Start: 138 Stop: 154 Base: 278 Items: 17 +Filter 31: Start: 146 Stop: 163 Base: 295 Items: 18 +Filter 32: Start: 155 Stop: 173 Base: 313 Items: 19 +Filter 33: Start: 164 Stop: 183 Base: 332 Items: 20 +Filter 34: Start: 174 Stop: 194 Base: 352 Items: 21 +Filter 35: Start: 184 Stop: 205 Base: 373 Items: 22 +Filter 36: Start: 195 Stop: 217 Base: 395 Items: 23 +Filter 37: Start: 206 Stop: 229 Base: 418 Items: 24 +Filter 38: Start: 218 Stop: 242 Base: 442 Items: 25 +Filter 39: Start: 230 Stop: 255 Base: 467 Items: 26 +gcc -g -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -I. -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Emulation MFCCmodel.c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccGenerator.c /home/marco-gwt/GWT/AutotilerV2/install/lib/libtile.a -lm -DSMALL +/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/GenMFCC -o /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -f BUILD_MODEL_8BIT_EMUL --L1 48736 --L2 350000 --L3 6388608 +MFCC_COEF_DYN = 10 +FFT_BITS = 10 + +==== Process Tiling For User Kernel: MFCC ======================= + MFCC Partition[0] Size = 24894 (Min: 0, Max: 25622), Fraction: 1.00, Giving: 48736 bytes out of 48736 bytes + MFCC, TiledSpace: Tile0 Iteration Count: 1 + In : Ratio: 0.000000, Size: 2560, Total: 2560, Move: 62720 (Decl x 1.960000) L2 +* Out : Ratio: 0.000000, Size: 3920, Total: 6480, Move: 3920 (Decl x 1.000000) L2 +* InOut1 : Ratio: 3.200000, Size: 4096, Total: 10576, Move: 0 (Decl x 0.000000) L2 +* Out_fft : Ratio: 3.200000, Size: 4096, Total: 14672, Move: 0 (Decl x 0.000000) L2 +* shift_fft : Ratio: 3.200000, Size: 1024, Total: 15696, Move: 0 (Decl x 0.000000) L2 +* InOut2 : Ratio: 0.125000, Size: 160, Total: 15856, Move: 0 (Decl x 0.000000) L2 +* Shift : Ratio: 0.003125, Size: 4, Total: 15860, Move: 0 (Decl x 0.000000) L2 +* WinTable : Ratio: 3.200000, Size: 2048, Total: 17908, Move: 2048 (Decl x 1.000000) L2 +* Twiddles_fft : Ratio: 6.400000, Size: 4096, Total: 22004, Move: 4096 (Decl x 1.000000) L2 +* SwapTable_fft : Ratio: 3.200000, Size: 2048, Total: 24052, Move: 2048 (Decl x 1.000000) L2 +*MFCC_FilterBank : Ratio: 0.125000, Size: 320, Total: 24372, Move: 320 (Decl x 1.000000) L2 +* MFCC_Coeffs : Ratio: 1.543750, Size: 988, Total: 25360, Move: 988 (Decl x 1.000000) L2 +* shift_bf : Ratio: 0.125000, Size: 40, Total: 25400, Move: 0 (Decl x 0.000000) L2 +* DCT_Coeff : Ratio: 5.000000, Size: 3200, Total: 28600, Move: 3200 (Decl x 1.000000) L2 + MFCC - IterSpace: Tile0 - L1 Memory: 28600, L2Move: 79340, L3Move: 0, Tiling Overhead: 1.631839 + MFCC Iteration for Tiled Space: 1 + MFCC For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 28600, Reusable Memory: 20136, Used L2 Memory: 0 +================================================================================================= + + +Generating Code For User Kernel: MFCC + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 28600 +L2 Memory size (Bytes) : Given: 350000, Used: 0 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for all user kernels : 0 Bytes +L2 Memory bandwidth for all user kerneks : 79340 Bytes +Sum of all Kernels arguments size : 48620 Bytes +Tiling Bandwith overhead : 1.631839 Move/KerArgSize +Percentage of baseline BW for L2 : inf % +Percentage of baseline BW for L3 : -nan % +Sum of all Kernels operations : 0 Operations + +Basic kernels library : MfccBasicKernels.h +Output Directory : /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL + +The following files have been generated: + MFCCKernels.c Generated C code for the user kernels and the user kernels groups + MFCCKernels.h Header file for the generated C code +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -DUSE_ABS -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -I/home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels -IBUILD_MODEL_8BIT_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/Generators/MFCC -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_8BIT_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/tools/nntool/autotiler/kernels/norm_transpose.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/MFCC/FFTLib.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o -lm +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 90/ 100 Accuracy: 90.00% +Pred/Tot: 184/ 200 Accuracy: 92.00% +Pred/Tot: 274/ 300 Accuracy: 91.33% +Pred/Tot: 360/ 400 Accuracy: 90.00% +Pred/Tot: 452/ 500 Accuracy: 90.40% +Pred/Tot: 538/ 600 Accuracy: 89.67% +Pred/Tot: 628/ 700 Accuracy: 89.71% +Pred/Tot: 710/ 800 Accuracy: 88.75% +Pred/Tot: 795/ 900 Accuracy: 88.33% +Pred/Tot: 881/1000 Accuracy: 88.10% +Pred/Tot: 965/1100 Accuracy: 87.73% +Pred/Tot: 1053/1200 Accuracy: 87.75% +Pred/Tot: 1141/1300 Accuracy: 87.77% +Pred/Tot: 1229/1400 Accuracy: 87.79% +Pred/Tot: 1311/1500 Accuracy: 87.40% +Pred/Tot: 1403/1600 Accuracy: 87.69% +Pred/Tot: 1493/1700 Accuracy: 87.82% +Pred/Tot: 1582/1800 Accuracy: 87.89% +Pred/Tot: 1672/1900 Accuracy: 88.00% +Pred/Tot: 1758/2000 Accuracy: 87.90% +Pred/Tot: 1847/2100 Accuracy: 87.95% +Pred/Tot: 1940/2200 Accuracy: 88.18% +Pred/Tot: 2034/2300 Accuracy: 88.43% +Pred/Tot: 2122/2400 Accuracy: 88.42% +Pred/Tot: 2206/2500 Accuracy: 88.24% +Pred/Tot: 2295/2600 Accuracy: 88.27% +Pred/Tot: 2381/2700 Accuracy: 88.19% +Pred/Tot: 2468/2800 Accuracy: 88.14% +Pred/Tot: 2554/2900 Accuracy: 88.07% +Pred/Tot: 2642/3000 Accuracy: 88.07% +Pred/Tot: 2731/3100 Accuracy: 88.10% +Pred/Tot: 2822/3200 Accuracy: 88.19% +Pred/Tot: 2917/3300 Accuracy: 88.39% +Pred/Tot: 2999/3400 Accuracy: 88.21% +Pred/Tot: 3089/3500 Accuracy: 88.26% +Pred/Tot: 3178/3600 Accuracy: 88.28% +Pred/Tot: 3259/3700 Accuracy: 88.08% +Pred/Tot: 3354/3800 Accuracy: 88.26% +Pred/Tot: 3445/3900 Accuracy: 88.33% +Pred/Tot: 3528/4000 Accuracy: 88.20% +Pred/Tot: 3619/4100 Accuracy: 88.27% +Pred/Tot: 3708/4200 Accuracy: 88.29% +Pred/Tot: 3797/4300 Accuracy: 88.30% +Pred/Tot: 3886/4400 Accuracy: 88.32% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 3927/4444 Accuracy: 88.37% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 1 291 2 6 4 15 8 11 16 0 8 9] + [ 1 11 340 6 0 5 30 0 0 0 0 4] + [ 1 20 2 338 0 16 5 0 3 0 2 19] + [ 0 4 0 0 316 1 3 0 1 9 14 2] + [ 1 5 2 17 0 337 2 0 1 0 6 6] + [ 0 9 2 2 2 2 331 2 0 1 1 0] + [ 0 30 2 0 1 0 5 320 0 1 0 4] + [ 1 8 1 0 5 1 1 0 334 7 2 3] + [ 0 5 0 1 30 1 1 1 11 313 5 5] + [ 3 3 1 0 11 3 0 0 1 3 322 3] + [ 1 10 0 23 0 18 0 2 0 2 2 314]] +Pred/Tot: 84/ 100 Accuracy: 84.00% +Pred/Tot: 171/ 200 Accuracy: 85.50% +Pred/Tot: 257/ 300 Accuracy: 85.67% +Pred/Tot: 340/ 400 Accuracy: 85.00% +Pred/Tot: 425/ 500 Accuracy: 85.00% +Pred/Tot: 513/ 600 Accuracy: 85.50% +Pred/Tot: 602/ 700 Accuracy: 86.00% +Pred/Tot: 694/ 800 Accuracy: 86.75% +Pred/Tot: 784/ 900 Accuracy: 87.11% +Pred/Tot: 876/1000 Accuracy: 87.60% +Pred/Tot: 962/1100 Accuracy: 87.45% +Pred/Tot: 1053/1200 Accuracy: 87.75% +Pred/Tot: 1137/1300 Accuracy: 87.46% +Pred/Tot: 1227/1400 Accuracy: 87.64% +Pred/Tot: 1314/1500 Accuracy: 87.60% +Pred/Tot: 1409/1600 Accuracy: 88.06% +Pred/Tot: 1495/1700 Accuracy: 87.94% +Pred/Tot: 1584/1800 Accuracy: 88.00% +Pred/Tot: 1666/1900 Accuracy: 87.68% +Pred/Tot: 1753/2000 Accuracy: 87.65% +Pred/Tot: 1843/2100 Accuracy: 87.76% +Pred/Tot: 1937/2200 Accuracy: 88.05% +Pred/Tot: 2025/2300 Accuracy: 88.04% +Pred/Tot: 2109/2400 Accuracy: 87.88% +Pred/Tot: 2202/2500 Accuracy: 88.08% +Pred/Tot: 2289/2600 Accuracy: 88.04% +Pred/Tot: 2377/2700 Accuracy: 88.04% +Pred/Tot: 2465/2800 Accuracy: 88.04% +Pred/Tot: 2551/2900 Accuracy: 87.97% +Pred/Tot: 2635/3000 Accuracy: 87.83% +Pred/Tot: 2722/3100 Accuracy: 87.81% +Pred/Tot: 2809/3200 Accuracy: 87.78% +Pred/Tot: 2902/3300 Accuracy: 87.94% +Pred/Tot: 2987/3400 Accuracy: 87.85% +Pred/Tot: 3075/3500 Accuracy: 87.86% +Pred/Tot: 3168/3600 Accuracy: 88.00% +Pred/Tot: 3251/3700 Accuracy: 87.86% +Pred/Tot: 3337/3800 Accuracy: 87.82% +Pred/Tot: 3421/3900 Accuracy: 87.72% +Pred/Tot: 3503/4000 Accuracy: 87.58% +Pred/Tot: 3595/4100 Accuracy: 87.68% +Pred/Tot: 3682/4200 Accuracy: 87.67% +Pred/Tot: 3770/4300 Accuracy: 87.67% +Pred/Tot: 3857/4400 Accuracy: 87.66% +Pred/Tot: 3943/4500 Accuracy: 87.62% +Pred/Tot: 4027/4600 Accuracy: 87.54% +Pred/Tot: 4113/4700 Accuracy: 87.51% +Pred/Tot: 4202/4800 Accuracy: 87.54% + +FINAL TESTING ACCURACY: +Pred/Tot: 4279/4889 Accuracy: 87.52% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 2 317 3 7 2 8 14 17 15 2 6 15] + [ 1 11 356 13 0 2 28 2 0 0 0 6] + [ 1 9 0 345 2 19 10 0 0 0 0 19] + [ 2 9 0 0 385 0 2 0 7 8 9 3] + [ 2 16 2 18 0 340 7 0 4 1 4 12] + [ 2 8 7 0 1 0 393 0 0 0 0 1] + [ 1 24 0 0 2 0 10 352 2 0 3 2] + [ 2 9 2 1 7 4 0 1 353 13 0 4] + [ 2 10 0 0 44 1 3 0 9 321 2 10] + [ 1 6 0 1 17 8 0 0 1 2 373 2] + [ 3 12 0 28 1 15 2 2 0 2 1 336]] diff --git a/accuracy_log/log_test_small_spectr_v2.txt b/accuracy_log/log_test_small_spectr_v2.txt new file mode 100644 index 0000000..cd6eb91 --- /dev/null +++ b/accuracy_log/log_test_small_spectr_v2.txt @@ -0,0 +1,2028 @@ +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +python3 utils/test_accuracy_emul.py --tflite_model model/KWS_ds_cnn_s_quant.tflite --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --test_with_wav 1 --use_power_spectrogram 0 +WARNING:tensorflow:From utils/test_accuracy_emul.py:311: The name tf.app.run is deprecated. Please use tf.compat.v1.app.run instead. + +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from main_emulation.c:32: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_BasicKernels_SQ8.h:3, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.h:5, + from BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c:1: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:17: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WriteImageToFile’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:437:18: note: in expansion of macro ‘__OPEN_WRITE’ + 437 | void *File = __OPEN_WRITE(fs, ImageName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:454:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 454 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:460:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 460 | ret+=__WRITE(File, img_rgb888, rgb888_size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:473:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 473 | ret+=__WRITE(File,OutBuffer +(CHUNK_SIZE*i), CHUNK_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:476:26: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 476 | ret+=__WRITE(File,OutBuffer+(CHUNK_SIZE*steps) , ((W*H*PixelSize) % CHUNK_SIZE)*sizeof(unsigned char)); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:479:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 479 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/ImgIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c: In function ‘WritePPMHeader’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:57: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c:404:9: note: in expansion of macro ‘__WRITE’ + 404 | __WRITE(FD,&(Buffer[a]), sizeof(unsigned char)); + | ^~~~~~~ +In file included from /home/marco-gwt/GWT/AutotilerV2/Emulation/at_api.h:21, + from /home/marco-gwt/GWT/AutotilerV2/Emulation/Gap.h:18, + from /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:7: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c: In function ‘AT_TensorGetNextPage’: +/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c:79:63: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | AT_HYPERFLASH_FS_CL_COPY((AT_HYPERFLASH_FS_T *) L3_Device, (AT_HYPERFLASH_FS_EXT_ADDR_TYPE) (Addr+Offset), (AT_HYPERFLASH_FS_INT_ADDR_TYPE) L2_BufferAddr, Size, 0, L3_Event); + | ^ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:225:36: note: in definition of macro ‘AT_HYPERFLASH_FS_CL_COPY’ + 225 | __at_hyperflash_fs_copy(*(file), ext, loc, size, dir) + | ^~~ +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h: In function ‘__at_hyperflash_fs_copy’: +/home/marco-gwt/GWT/AutotilerV2/Emulation/at_api_emul.h:173:8: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 173 | else fread(loc, 1, size, file); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:46:20: warning: useless storage class specifier in empty declaration + 46 | typedef struct pi_device {}; + | ^~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c: In function ‘WriteWavToFileNew’: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:37:57: warning: initialization of ‘void *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] + 37 | #define __OPEN_WRITE(__FS, __NAME) open(__NAME, O_RDWR | O_CREAT, S_IRWXU) + | ^~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:236:18: note: in expansion of macro ‘__OPEN_WRITE’ + 236 | void *File = __OPEN_WRITE(fs, FileName); + | ^~~~~~~~~~~~ +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:328:20: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 328 | ret += __WRITE(File, header_buffer, WAV_HEADER_SIZE); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:335:21: warning: passing argument 1 of ‘write’ makes integer from pointer without a cast [-Wint-conversion] + 335 | ret += __WRITE(File, data, Size); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:41:63: note: in definition of macro ‘__WRITE’ + 41 | #define __WRITE(__FD, __BUF, __LEN) write(__FD, __BUF, __LEN) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:366:27: note: expected ‘int’ but argument is of type ‘void *’ + 366 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur; + | ~~~~^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:338:13: warning: passing argument 1 of ‘close’ makes integer from pointer without a cast [-Wint-conversion] + 338 | __CLOSE(File); + | ^~~~ + | | + | void * +/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:38:63: note: in definition of macro ‘__CLOSE’ + 38 | #define __CLOSE(__FD) close(__FD) + | ^~~~ +In file included from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/fs_switch.h:29, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include/gaplib/wavIO.h:13, + from /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c:9: +/usr/include/unistd.h:353:23: note: expected ‘int’ but argument is of type ‘void *’ + 353 | extern int close (int __fd); + | ~~~~^~~~ +make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL=1 MEDIUM=0 LARGE=0 WITH_MFCC=1 USE_POWER=0 +make[1]: Entering directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +script model/nntool_script +GEN ... /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Generator_Util.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/CNN_Copy_Generators.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators/SSD_Generators.c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeGenerator.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/CNN_Generators_SQ8.c /home/marco-gwt/GWT/AutotilerV2/CNN_Generators_SQ8/RNN_Generators_SQ8.c +rm -f -rf BUILD_MODEL_SQ8_EMUL +rm -f -r BUILD_EMUL +rm -f kws_ds_cnn_emul +mkdir BUILD_MODEL_SQ8_EMUL +cp model/KWS_ds_cnn_s_quant.tflite BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite +echo "GENERATING NNTOOL STATE FILE" +GENERATING NNTOOL STATE FILE +nntool -s model/nntool_script BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite -q +settings - set log level to INFO +log_level - was: 'INFO' +now: 'INFO' +open - opening graph file BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quant.tflite load_quantization = True +tflite - Importing TFLITE model version 3 +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +nngraph - update graph dimensions +set_aliases - looking for aliased edges +nngraph - calculate liveness +forwards SOFTMAX_0_11 in: -12.19<(i8-0.00)*0.09519558<12.09 out: None stop [] fusion False +handler SoftmaxTanHMult selected for SoftMaxParameters(SOFTMAX_0_11) +forwards handler SOFTMAX_0_11 returned in: -16.00<(i8-0.00)*0.12500000<15.88 forced out: -1.00<(i16-0.00)*0.00003052<1.00 fusion False +forwards in edge 0 does not match was -12.19<(i8-0.00)*0.09519558<12.09 need -16.00<(i8-0.00)*0.12500000<15.88 forced +go backwackwards to F 12x1x1x64 B 1 +backwards FULLY_CONNECTED_0_10 in: -11.38<(i8-0.00)*0.08886796<11.29,chan<(i8-0.00)*chan W: 10, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[4,10] + => H: 49, Pad:[4,5] PadT:[4,5] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 8 +OverlapP: 0 +TileCons: 2 +UsedIn : [10 x 49] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConvNxMStrideSxSy_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 328000 + +==== Process Tiling For User Kernel: S4_Conv2d_64x1x10x4_Relu ======================= +S4_Conv2d_64x1x10x4_Relu Partition[0] Size = 39393 (Min: 200, Max: 55185), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=25 + In Dim: 58, TileOverlap: 8, Ratio: 2.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S4_Conv2d_64x1x10x4_Relu, Total Raw Memory: 43448 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: In, Size: 492, Base1: 0, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Bias, Size: 256, Base1: 492, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Scale, Size: 64, Base1: 748, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ScaleN, Size: 64, Base1: 812, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Filter, Size: 2560, Base1: 876, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Out, Size: 8000, Base1: 3436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: ConvOut, Size: 32000, Base1: 11436, Base2: 0 +Ker: S4_Conv2d_64x1x10x4_Relu, Arg: Infos, Size: 12, Base1: 43436, Base2: 0 +S4_Conv2d_64x1x10x4_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 43448, Reusable Memory: 5288, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S7_Conv2d_64x1x3x3_Relu ======================= +S7_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S7_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S7_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S7_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S7_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S7_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S7_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S10_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S10_Conv2d_64x64x1x1_Relu ======================= +S10_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S10_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S10_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S10_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S10_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S13_Conv2d_64x1x3x3_Relu ======================= +S13_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S13_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S13_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S13_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S13_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S13_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S13_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S16_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S16_Conv2d_64x64x1x1_Relu ======================= +S16_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S16_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S16_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S16_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S16_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S19_Conv2d_64x1x3x3_Relu ======================= +S19_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S19_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S19_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S19_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S19_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S19_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S19_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S22_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S22_Conv2d_64x64x1x1_Relu ======================= +S22_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S22_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S22_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S22_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S22_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[1,1] PadT:[1,1] => Wc: 5, Filter:[3,3] + => H: 25, Pad:[1,1] PadT:[1,1] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 2 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + ConvKerName: KerParConvDW3x3Stride1B32_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 80000 + +==== Process Tiling For User Kernel: S25_Conv2d_64x1x3x3_Relu ======================= +S25_Conv2d_64x1x3x3_Relu Partition[0] Size = 36505 (Min: 30, Max: 67337), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=25 + In Dim: 27, TileOverlap: 2, Ratio: 1.000000 + Out Dim: 25, TileOverlap: 0, Ratio: 1.000000 + ConvOut Dim: 25, TileOverlap: 0, Ratio: 1.000000 + +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Bias, was using 320 Bytes will require 256 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Scale, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: ScaleN, was using 80 Bytes will require 64 Bytes buffer +S25_Conv2d_64x1x3x3_Relu Full buffering on Arg: Filter, was using 720 Bytes will require 576 Bytes buffer +S25_Conv2d_64x1x3x3_Relu, TiledSpace: Tile0 Iteration Count: 1 Parametric Space: [D0, M0=40] + In : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 10000, Move: 8000 (Decl x 1.000000) L2 +* Bias : Ratio: 0.000000, Size: 256, Total: 10256, Move: 256 (Decl x 1.000000) L2 +* Scale : Ratio: 0.000000, Size: 64, Total: 10320, Move: 64 (Decl x 1.000000) L2 +* ScaleN : Ratio: 0.000000, Size: 64, Total: 10384, Move: 64 (Decl x 1.000000) L2 +* Filter : Ratio: 0.000000, Size: 576, Total: 10960, Move: 576 (Decl x 1.000000) L2 + Out : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 10000, Total: 20960, Move: 8000 (Decl x 1.000000) L2 +* ConvOut : Ratio: 1.000000, FixDim: 5, VarDim: 25 [ 25], Size: 20000, Total: 40960, Move: 0 (Decl x 0.000000) L2 +* Infos : Ratio: 0.000000, Size: 12, Total: 40972, Move: 9 (Decl x 1.000000) L2 +S25_Conv2d_64x1x3x3_Relu - IterSpace: Tile0 - L1 Memory: 40972, L2Move: 16969, L3Move: 0, Tiling Overhead: 1.000000 +S25_Conv2d_64x1x3x3_Relu Found Parametric value for space D0 (Initial: 64, Div: 8) = 40 [40*1 + 24], Iteration for Tiled Space: 1 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: In, Size: 5000, Base1: 0, Base2: 5000 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Bias, Size: 256, Base1: 10000, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Scale, Size: 64, Base1: 10256, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ScaleN, Size: 64, Base1: 10320, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Filter, Size: 576, Base1: 10384, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Out, Size: 5000, Base1: 10960, Base2: 15960 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: ConvOut, Size: 20000, Base1: 20960, Base2: 0 +Ker: S25_Conv2d_64x1x3x3_Relu, Arg: Infos, Size: 12, Base1: 40960, Base2: 0 +S25_Conv2d_64x1x3x3_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 40972, Reusable Memory: 7764, Used L2 Memory: 0 +================================================================================================= + +InFeat: 64, OutFeat: 64 +Conv => W: 5, Pad:[0,0] PadT:[0,0] => Wc: 5, Filter:[1,1] + => H: 25, Pad:[0,0] PadT:[0,0] => Hc: 25 +Pool => Wc: 5, Pad:[0,0] => Wo: 5, Filter:[1,1] + => Hc: 25, Pad:[0,0] => Ho: 25 +OverlapC: 0 +OverlapP: 0 +TileCons: 1 +UsedIn : [5 x 25] +UsedC : [5 x 25] + SetBiasKerName: KerParSetBiasB32_SQ8 + ConvKerName: KerParConv1x1Stride1_SQ8 + DPReductionKerName: KerParReduct_CC_ReLU_SQ8 +Nb Oper : 520000 +Mapping this convolution to matrix multiplication +CNN_MatMul_SQ8: S28_Conv2d_64x64x1x1_Relu +In1 => W: 64, H: 64 +In2 => W: 125, H: 64, w: 5, h: 25, Sx: 1, Sy: 1 +Out => W: 125, H: 64 => Column first + MatMulKerName: KerParMatMulB32_2x4_ReLU_SQ8 + +==== Process Tiling For User Kernel: S28_Conv2d_64x64x1x1_Relu ======================= +S28_Conv2d_64x64x1x1_Relu Partition[0] Size = 1424 (Min: 1024, Max: 8640), Fraction: 0.52, Giving: 8640 Bytes out of 48736 Bytes +S28_Conv2d_64x64x1x1_Relu Partition[1] Size = 1305 (Min: 512, Max: 32329), Fraction: 0.48, Giving: 40095 Bytes out of 48736 Bytes + +Reference object: In2, Dim=125 + In2 Dim: 125, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 125, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 16268 fits into L1 memory 40095. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: KerBuff, Size: 256, Base1: 0, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In2, Size: 8000, Base1: 256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Out, Size: 8000, Base1: 8256, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Infos, Size: 12, Base1: 16256, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 1 Iteration count: 1, Given L1 Memory: 40095, Used L1 Memory: 16268, Reusable Memory: 23824, Used L2 Memory: 0 + +Reference object: In1, Dim=64 + In1 Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Bias Dim: 64, TileOverlap: 0, Ratio: 1.000000 + Scale Dim: 64, TileOverlap: 0, Ratio: 1.000000 + ScaleN Dim: 64, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S28_Conv2d_64x64x1x1_Relu, Total Raw Memory: 4480 fits into L1 memory 8640. Promoting all kernel arguments to initialized buffers. +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: In1, Size: 4096, Base1: 16268, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Bias, Size: 256, Base1: 20364, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: Scale, Size: 64, Base1: 20620, Base2: 0 +Ker: S28_Conv2d_64x64x1x1_Relu, Arg: ScaleN, Size: 64, Base1: 20684, Base2: 0 +S28_Conv2d_64x64x1x1_Relu For Iter Space: 0 Iteration count: 1, Given L1 Memory: 8640, Used L1 Memory: 4480, Reusable Memory: 4160, Used L2 Memory: 0 +================================================================================================= + +Pool => W: 5, Pad:[0,0] => Wo: 1 + => H: 25, Pad:[0,0] => Ho: 1 +OverlapP: 23 +TileCons: 2 +UsedIn : [5 x 25] + PoolKerName: KerParPoolNxMStrideSxSy_SQ8 +Nb Oper : 8000 + +==== Process Tiling For User Kernel: S29_AveragePool_25x5 ======================= +S29_AveragePool_25x5 Partition[0] Size = 16271 (Min: 250, Max: 16155), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: Out, Dim=1 + In Dim: 25, TileOverlap: 23, Ratio: 2.000000 + Out Dim: 1, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S29_AveragePool_25x5, Total Raw Memory: 8076 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S29_AveragePool_25x5, Arg: In, Size: 8000, Base1: 0, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Out, Size: 64, Base1: 8000, Base2: 0 +Ker: S29_AveragePool_25x5, Arg: Infos, Size: 12, Base1: 8064, Base2: 0 +S29_AveragePool_25x5 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 8076, Reusable Memory: 40660, Used L2 Memory: 0 +================================================================================================= + +Linear Layer S32_Linear_12x64x1x1, Linear: InDim: 64, OutDim: 12, Activation: None +Linear Kernel: KerParLinearLayerFullFeatB32_SQ8 + +==== Process Tiling For User Kernel: S32_Linear_12x64x1x1 ======================= +S32_Linear_12x64x1x1 Partition[0] Size = 1791 (Min: 0, Max: 1875), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=1 + +Kernel: S32_Linear_12x64x1x1, Total Raw Memory: 928 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S32_Linear_12x64x1x1, Arg: In, Size: 64, Base1: 0, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Filter, Size: 768, Base1: 64, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Bias, Size: 48, Base1: 832, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Out, Size: 12, Base1: 880, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Scale, Size: 12, Base1: 892, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: ScaleN, Size: 12, Base1: 904, Base2: 0 +Ker: S32_Linear_12x64x1x1, Arg: Infos, Size: 12, Base1: 916, Base2: 0 +S32_Linear_12x64x1x1 For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 928, Reusable Memory: 47808, Used L2 Memory: 0 +================================================================================================= + + +==== Process Tiling For User Kernel: S33_SoftMax ======================= + S33_SoftMax Partition[0] Size = 51 (Min: 8, Max: 63), Fraction: 1.00, Giving: 48736 Bytes out of 48736 Bytes + +Reference object: In, Dim=12 + In Dim: 12, TileOverlap: 0, Ratio: 1.000000 + Out Dim: 12, TileOverlap: 0, Ratio: 1.000000 + +Kernel: S33_SoftMax, Total Raw Memory: 48 fits into L1 memory 48736. Promoting all kernel arguments to initialized buffers. +Ker: S33_SoftMax, Arg: In, Size: 12, Base1: 0, Base2: 0 +Ker: S33_SoftMax, Arg: Out, Size: 24, Base1: 12, Base2: 0 +Ker: S33_SoftMax, Arg: Infos, Size: 12, Base1: 36, Base2: 0 + S33_SoftMax For Iter Space: 0 Iteration count: 1, Given L1 Memory: 48736, Used L1 Memory: 48, Reusable Memory: 48688, Used L2 Memory: 0 +================================================================================================= + + Symbol: S32_Output[ In] Adding Edge From S32_Linear_12x64x1x1 To S33_SoftMax New + Symbol: S29_Output[ In] Adding Edge From S29_AveragePool_25x5 To S32_Linear_12x64x1x1 New + Symbol: S28_Output[ In] Adding Edge From S28_Conv2d_64x64x1x1_Relu To S29_AveragePool_25x5 New + Symbol: S25_Output[ In] Adding Edge From S25_Conv2d_64x1x3x3_Relu To S28_Conv2d_64x64x1x1_Relu New + Symbol: S22_Output[ In] Adding Edge From S22_Conv2d_64x64x1x1_Relu To S25_Conv2d_64x1x3x3_Relu New + Symbol: S19_Output[ In] Adding Edge From S19_Conv2d_64x1x3x3_Relu To S22_Conv2d_64x64x1x1_Relu New + Symbol: S16_Output[ In] Adding Edge From S16_Conv2d_64x64x1x1_Relu To S19_Conv2d_64x1x3x3_Relu New + Symbol: S13_Output[ In] Adding Edge From S13_Conv2d_64x1x3x3_Relu To S16_Conv2d_64x64x1x1_Relu New + Symbol: S10_Output[ In] Adding Edge From S10_Conv2d_64x64x1x1_Relu To S13_Conv2d_64x1x3x3_Relu New + Symbol: S7_Output[ In] Adding Edge From S7_Conv2d_64x1x3x3_Relu To S10_Conv2d_64x64x1x1_Relu New + Symbol: S4_Output[ In] Adding Edge From S4_Conv2d_64x1x10x4_Relu To S7_Conv2d_64x1x3x3_Relu New + Symbol: Output_1[ Out] Adding Edge From S33_SoftMax To __GraphExit__ New + Symbol: S33_Infos[ In] Adding Edge From __GraphEntry__ To S33_SoftMax New + Symbol: S32_Infos[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 New + Symbol: S32_Mul_shift[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S32_Mul_scale[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1matmul_bias[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: Dscnnfc1weights_quantfakequant[ In] Adding Edge From __GraphEntry__ To S32_Linear_12x64x1x1 Exists + Symbol: S29_Infos[ In] Adding Edge From __GraphEntry__ To S29_AveragePool_25x5 New + Symbol: S28_Infos[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu New + Symbol: S28_Mul_shift[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S28_Mul_scale[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_4pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S28_Conv2d_64x64x1x1_Relu Exists + Symbol: S25_Infos[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu New + Symbol: S25_Mul_shift[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S25_Mul_scale[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_4dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S25_Conv2d_64x1x3x3_Relu Exists + Symbol: S22_Infos[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu New + Symbol: S22_Mul_shift[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S22_Mul_scale[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_3pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S22_Conv2d_64x64x1x1_Relu Exists + Symbol: S19_Infos[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu New + Symbol: S19_Mul_shift[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S19_Mul_scale[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_3dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S19_Conv2d_64x1x3x3_Relu Exists + Symbol: S16_Infos[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu New + Symbol: S16_Mul_shift[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S16_Mul_scale[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_2pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S16_Conv2d_64x64x1x1_Relu Exists + Symbol: S13_Infos[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu New + Symbol: S13_Mul_shift[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S13_Mul_scale[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_2dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S13_Conv2d_64x1x3x3_Relu Exists + Symbol: S10_Infos[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu New + Symbol: S10_Mul_shift[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S10_Mul_scale[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convconv2d_fo[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: Dscnnconv_ds_1pw_convweights_q[ In] Adding Edge From __GraphEntry__ To S10_Conv2d_64x64x1x1_Relu Exists + Symbol: S7_Infos[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu New + Symbol: S7_Mul_shift[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S7_Mul_scale[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convdepthwise[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: Dscnnconv_ds_1dw_convweights_q[ In] Adding Edge From __GraphEntry__ To S7_Conv2d_64x1x3x3_Relu Exists + Symbol: S4_Infos[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu New + Symbol: S4_Mul_shift[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: S4_Mul_scale[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1conv2d_fold_bias[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Dscnnconv_1weights_quantfakequ[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists + Symbol: Input_1[ In] Adding Edge From __GraphEntry__ To S4_Conv2d_64x1x10x4_Relu Exists +After Dynamic Allocation, TopL3: 0, TopL2: 24000 => Alloc: OK + +After Const Allocation, TopL3: 0, TopL2: 49685 => Alloc: OK + +[FULL] Remapping [24000 .. 49684] to [0 .. 25684] Align compensation: 3 +[PART] Remapping [0 .. 23999] to [25688 .. 49687] Align compensation: 0 +[PART] Remapping [49685 .. 349999] to [49688 .. 350002] Align compensation: 1 +Symbol allocation for graph KWS_ds_cnn_s_quantCNN is sucessfull, L2: 49685 out of 350000, L3: 0 out of 6388608 +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph structure: + +Node 0, Channel 0 0: GraphEntry __GraphEntry__, Operations: 0 + (null) => Input_1 + (null) => Dscnnconv_1weights_quantfakequ + (null) => Dscnnconv_1conv2d_fold_bias + (null) => S4_Mul_scale + (null) => S4_Mul_shift + (null) => S4_Infos + (null) => Dscnnconv_ds_1dw_convweights_q + (null) => Dscnnconv_ds_1dw_convdepthwise + (null) => S7_Mul_scale + (null) => S7_Mul_shift + (null) => S7_Infos + (null) => Dscnnconv_ds_1pw_convweights_q + (null) => Dscnnconv_ds_1pw_convconv2d_fo + (null) => S10_Mul_scale + (null) => S10_Mul_shift + (null) => S10_Infos + (null) => Dscnnconv_ds_2dw_convweights_q + (null) => Dscnnconv_ds_2dw_convdepthwise + (null) => S13_Mul_scale + (null) => S13_Mul_shift + (null) => S13_Infos + (null) => Dscnnconv_ds_2pw_convweights_q + (null) => Dscnnconv_ds_2pw_convconv2d_fo + (null) => S16_Mul_scale + (null) => S16_Mul_shift + (null) => S16_Infos + (null) => Dscnnconv_ds_3dw_convweights_q + (null) => Dscnnconv_ds_3dw_convdepthwise + (null) => S19_Mul_scale + (null) => S19_Mul_shift + (null) => S19_Infos + (null) => Dscnnconv_ds_3pw_convweights_q + (null) => Dscnnconv_ds_3pw_convconv2d_fo + (null) => S22_Mul_scale + (null) => S22_Mul_shift + (null) => S22_Infos + (null) => Dscnnconv_ds_4dw_convweights_q + (null) => Dscnnconv_ds_4dw_convdepthwise + (null) => S25_Mul_scale + (null) => S25_Mul_shift + (null) => S25_Infos + (null) => Dscnnconv_ds_4pw_convweights_q + (null) => Dscnnconv_ds_4pw_convconv2d_fo + (null) => S28_Mul_scale + (null) => S28_Mul_shift + (null) => S28_Infos + (null) => S29_Infos + (null) => Dscnnfc1weights_quantfakequant + (null) => Dscnnfc1matmul_bias + (null) => S32_Mul_scale + (null) => S32_Mul_shift + (null) => S32_Infos + (null) => S33_Infos + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: 2 3 4 5 6 7 8 9 1 10 11 12 + + Living Dynamic Symbols: [Input_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 1, Channel 1 8: UKer S4_Conv2d_64x1x10x4_Relu, Operations: 328000 + I Buff In => Input_1 --L2-- Size: 490, L3_Move: 0, L2_Move: 490, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_1weights_quantfakequ --L2-- Size: 2560, L3_Move: 0, L2_Move: 2560, TileOverhead: 1.000000, L2Buff: 0, Addr: 876 +CI Buff Bias => Dscnnconv_1conv2d_fold_bias --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 492 + O Buff Out => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 3436 +CI Buff Scale => S4_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 748 +CI Buff ScaleN => S4_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 812 +CI Buff Infos => S4_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 43436 + Kernel Memory : L3: 0, L2: 11443 + Kernel Total Memory: 11443, L3 moves: 0, L2 moves: 11443, Move overhead: 1.000000 + Kernel Operations : 328000 [KernelOper/GraphOper: 12.126680%], Move/Operation ratio: [L3: 0.000000, L2: 0.034887] + Successors: 2 + + Living Dynamic Symbols: [Input_1] [S4_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 2, Channel 0 0: UKer S7_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S4_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_1dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_1dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S7_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S7_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S7_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 3 + + Living Dynamic Symbols: [S4_Output] [S7_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 3, Channel 0 0: UKer S10_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S7_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_1pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_1pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S10_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S10_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S10_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 4 + + Living Dynamic Symbols: [S7_Output] [S10_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 4, Channel 0 0: UKer S13_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S10_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_2dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_2dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S13_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S13_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S13_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 5 + + Living Dynamic Symbols: [S10_Output] [S13_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 5, Channel 0 0: UKer S16_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S13_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_2pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_2pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S16_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S16_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S16_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 6 + + Living Dynamic Symbols: [S13_Output] [S16_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 6, Channel 0 0: UKer S19_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S16_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_3dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_3dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S19_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S19_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S19_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 7 + + Living Dynamic Symbols: [S16_Output] [S19_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 7, Channel 0 0: UKer S22_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S19_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_3pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_3pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S22_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S22_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S22_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 8 + + Living Dynamic Symbols: [S19_Output] [S22_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 8, Channel 0 0: UKer S25_Conv2d_64x1x3x3_Relu, Operations: 80000 + I In => S22_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnconv_ds_4dw_convweights_q --L2-- Size: 576, L3_Move: 0, L2_Move: 576, TileOverhead: 1.000000, L2Buff: 0, Addr: 10384 +CI Buff Bias => Dscnnconv_ds_4dw_convdepthwise --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 10000 + O Out => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 10960 +CI Buff Scale => S25_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10256 +CI Buff ScaleN => S25_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 10320 +CI Buff Infos => S25_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 40960 + Kernel Memory : L3: 0, L2: 16969 + Kernel Total Memory: 16969, L3 moves: 0, L2 moves: 16969, Move overhead: 1.000000 + Kernel Operations : 80000 [KernelOper/GraphOper: 2.957727%], Move/Operation ratio: [L3: 0.000000, L2: 0.212113] + Successors: 9 + + Living Dynamic Symbols: [S22_Output] [S25_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 9, Channel 0 0: UKer S28_Conv2d_64x64x1x1_Relu, Operations: 512000 + I Buff In2 => S25_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 256 +CI Buff In1 => Dscnnconv_ds_4pw_convweights_q --L2-- Size: 4096, L3_Move: 0, L2_Move: 4096, TileOverhead: 1.000000, L2Buff: 0, Addr: 16268 +CI Buff Bias => Dscnnconv_ds_4pw_convconv2d_fo --L2-- Size: 256, L3_Move: 0, L2_Move: 256, TileOverhead: 1.000000, L2Buff: 0, Addr: 20364 + O Buff Out => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 8256 +CI Buff Scale => S28_Mul_scale --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20620 +CI Buff ScaleN => S28_Mul_shift --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 20684 +CI Buff Infos => S28_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 16256 + Kernel Memory : L3: 0, L2: 20489 + Kernel Total Memory: 20489, L3 moves: 0, L2 moves: 20489, Move overhead: 1.000000 + Kernel Operations : 512000 [KernelOper/GraphOper: 18.929450%], Move/Operation ratio: [L3: 0.000000, L2: 0.040018] + Successors: 10 + + Living Dynamic Symbols: [S25_Output] [S28_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 10, Channel 0 0: UKer S29_AveragePool_25x5, Operations: 8000 + I Buff In => S28_Output --L2-- Size: 8000, L3_Move: 0, L2_Move: 8000, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 8000 +CI Buff Infos => S29_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 8064 + Kernel Memory : L3: 0, L2: 8073 + Kernel Total Memory: 8073, L3 moves: 0, L2 moves: 8073, Move overhead: 1.000000 + Kernel Operations : 8000 [KernelOper/GraphOper: 0.295773%], Move/Operation ratio: [L3: 0.000000, L2: 1.009125] + Successors: 11 + + Living Dynamic Symbols: [S28_Output] [S29_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 11, Channel 0 0: UKer S32_Linear_12x64x1x1, Operations: 768 + I Buff In => S29_Output --L2-- Size: 64, L3_Move: 0, L2_Move: 64, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 +CI Buff Filter => Dscnnfc1weights_quantfakequant --L2-- Size: 768, L3_Move: 0, L2_Move: 768, TileOverhead: 1.000000, L2Buff: 0, Addr: 64 +CI Buff Bias => Dscnnfc1matmul_bias --L2-- Size: 48, L3_Move: 0, L2_Move: 48, TileOverhead: 1.000000, L2Buff: 0, Addr: 832 + O Buff Out => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 880 +CI Buff Scale => S32_Mul_scale --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 892 +CI Buff ScaleN => S32_Mul_shift --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 904 +CI Buff Infos => S32_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 916 + Kernel Memory : L3: 0, L2: 925 + Kernel Total Memory: 925, L3 moves: 0, L2 moves: 925, Move overhead: 1.000000 + Kernel Operations : 768 [KernelOper/GraphOper: 0.028394%], Move/Operation ratio: [L3: 0.000000, L2: 1.204427] + Successors: 12 + + Living Dynamic Symbols: [S29_Output] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 12, Channel 0 0: UKer S33_SoftMax, Operations: 12 + I Buff In => S32_Output --L2-- Size: 12, L3_Move: 0, L2_Move: 12, TileOverhead: 1.000000, L2Buff: 0, Addr: 0 + O Buff Out => Output_1 --L2-- Size: 24, L3_Move: 0, L2_Move: 24, TileOverhead: 1.000000, L2Buff: 0, Addr: 12 +CI Buff Infos => S33_Infos --L2-- Size: 9, L3_Move: 0, L2_Move: 9, TileOverhead: 1.000000, L2Buff: 0, Addr: 36 + Kernel Memory : L3: 0, L2: 45 + Kernel Total Memory: 45, L3 moves: 0, L2 moves: 45, Move overhead: 1.000000 + Kernel Operations : 12 [KernelOper/GraphOper: 0.000444%], Move/Operation ratio: [L3: 0.000000, L2: 3.750000] + Successors: 13 + + Living Dynamic Symbols: [Output_1] [S32_Output] + +------------------------------------------------------------------------------------------------------------------------------------------------ +Node 13, Channel 0 0: GraphExit __GraphExit__, Operations: 0 + (null) => Output_1 + Kernel Memory : L3: 0, L2: 0 + Kernel Total Memory: 0, L3 moves: 0, L2 moves: 0, Move overhead: 1.000000 + Kernel Operations : 0 [KernelOper/GraphOper: 0.000000%], Move/Operation ratio: [L3: 0.000000, L2: 0.000000] + Successors: + + Living Dynamic Symbols: [Output_1] + +------------------------------------------------------------------------------------------------------------------------------------------------ + Graph nodes max local memory : L3: 0, L2: 20489 + Graph nodes min global memory: L3: 0, L2: 20492 + Graph sum of kernel arguments size: 170318, L3 moves: 0, L2 moves: 170318, Move overhead: 1.000000 + Graph total operations: 2704780 + + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Memory bandwidth report: + + Sum of All Kernel's arguments size: 170318, Total L3_Move: 0, Total L2_Move: 170318, Tiling Overhead Average: 1.000000 + +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Total minimum memory requirement report: + + L3 Memory L2 Memory + Dynamic 0 16000 + Const 0 4492 + Total 0 20492 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph symbols allocation: + + Input_1 Externally allocated + Dscnnconv_1weights_quantfakequ INSTALL: HyperFlash[ 0: 13]@ 16384 LOAD: L2[ 0: 13]@ 16384 EXEC: L2[ 0: 13]@ 16384 , Size: 2560 + Dscnnconv_1conv2d_fold_bias INSTALL: HyperFlash[ 0: 13]@ 22016 LOAD: L2[ 0: 13]@ 22016 EXEC: L2[ 0: 13]@ 22016 , Size: 256 + S4_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24320 LOAD: L2[ 0: 13]@ 24320 EXEC: L2[ 0: 13]@ 24320 , Size: 64 + S4_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24384 LOAD: L2[ 0: 13]@ 24384 EXEC: L2[ 0: 13]@ 24384 , Size: 64 + S4_Infos INSTALL: HyperFlash[ 0: 13]@ 25520 LOAD: L2[ 0: 13]@ 25520 EXEC: L2[ 0: 13]@ 25520 , Size: 9 + Dscnnconv_ds_1dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 19712 LOAD: L2[ 0: 13]@ 19712 EXEC: L2[ 0: 13]@ 19712 , Size: 576 + Dscnnconv_ds_1dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22272 LOAD: L2[ 0: 13]@ 22272 EXEC: L2[ 0: 13]@ 22272 , Size: 256 + S7_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24448 LOAD: L2[ 0: 13]@ 24448 EXEC: L2[ 0: 13]@ 24448 , Size: 64 + S7_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24512 LOAD: L2[ 0: 13]@ 24512 EXEC: L2[ 0: 13]@ 24512 , Size: 64 + S7_Infos INSTALL: HyperFlash[ 0: 13]@ 25532 LOAD: L2[ 0: 13]@ 25532 EXEC: L2[ 0: 13]@ 25532 , Size: 9 + Dscnnconv_ds_1pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 0 LOAD: L2[ 0: 13]@ 0 EXEC: L2[ 0: 13]@ 0 , Size: 4096 + Dscnnconv_ds_1pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 22528 LOAD: L2[ 0: 13]@ 22528 EXEC: L2[ 0: 13]@ 22528 , Size: 256 + S10_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24576 LOAD: L2[ 0: 13]@ 24576 EXEC: L2[ 0: 13]@ 24576 , Size: 64 + S10_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24640 LOAD: L2[ 0: 13]@ 24640 EXEC: L2[ 0: 13]@ 24640 , Size: 64 + S10_Infos INSTALL: HyperFlash[ 0: 13]@ 25544 LOAD: L2[ 0: 13]@ 25544 EXEC: L2[ 0: 13]@ 25544 , Size: 9 + Dscnnconv_ds_2dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20288 LOAD: L2[ 0: 13]@ 20288 EXEC: L2[ 0: 13]@ 20288 , Size: 576 + Dscnnconv_ds_2dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 22784 LOAD: L2[ 0: 13]@ 22784 EXEC: L2[ 0: 13]@ 22784 , Size: 256 + S13_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24704 LOAD: L2[ 0: 13]@ 24704 EXEC: L2[ 0: 13]@ 24704 , Size: 64 + S13_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24768 LOAD: L2[ 0: 13]@ 24768 EXEC: L2[ 0: 13]@ 24768 , Size: 64 + S13_Infos INSTALL: HyperFlash[ 0: 13]@ 25556 LOAD: L2[ 0: 13]@ 25556 EXEC: L2[ 0: 13]@ 25556 , Size: 9 + Dscnnconv_ds_2pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 4096 LOAD: L2[ 0: 13]@ 4096 EXEC: L2[ 0: 13]@ 4096 , Size: 4096 + Dscnnconv_ds_2pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23040 LOAD: L2[ 0: 13]@ 23040 EXEC: L2[ 0: 13]@ 23040 , Size: 256 + S16_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24832 LOAD: L2[ 0: 13]@ 24832 EXEC: L2[ 0: 13]@ 24832 , Size: 64 + S16_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 24896 LOAD: L2[ 0: 13]@ 24896 EXEC: L2[ 0: 13]@ 24896 , Size: 64 + S16_Infos INSTALL: HyperFlash[ 0: 13]@ 25568 LOAD: L2[ 0: 13]@ 25568 EXEC: L2[ 0: 13]@ 25568 , Size: 9 + Dscnnconv_ds_3dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 20864 LOAD: L2[ 0: 13]@ 20864 EXEC: L2[ 0: 13]@ 20864 , Size: 576 + Dscnnconv_ds_3dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23296 LOAD: L2[ 0: 13]@ 23296 EXEC: L2[ 0: 13]@ 23296 , Size: 256 + S19_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 24960 LOAD: L2[ 0: 13]@ 24960 EXEC: L2[ 0: 13]@ 24960 , Size: 64 + S19_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25024 LOAD: L2[ 0: 13]@ 25024 EXEC: L2[ 0: 13]@ 25024 , Size: 64 + S19_Infos INSTALL: HyperFlash[ 0: 13]@ 25580 LOAD: L2[ 0: 13]@ 25580 EXEC: L2[ 0: 13]@ 25580 , Size: 9 + Dscnnconv_ds_3pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 8192 LOAD: L2[ 0: 13]@ 8192 EXEC: L2[ 0: 13]@ 8192 , Size: 4096 + Dscnnconv_ds_3pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 23552 LOAD: L2[ 0: 13]@ 23552 EXEC: L2[ 0: 13]@ 23552 , Size: 256 + S22_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25088 LOAD: L2[ 0: 13]@ 25088 EXEC: L2[ 0: 13]@ 25088 , Size: 64 + S22_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25152 LOAD: L2[ 0: 13]@ 25152 EXEC: L2[ 0: 13]@ 25152 , Size: 64 + S22_Infos INSTALL: HyperFlash[ 0: 13]@ 25592 LOAD: L2[ 0: 13]@ 25592 EXEC: L2[ 0: 13]@ 25592 , Size: 9 + Dscnnconv_ds_4dw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 21440 LOAD: L2[ 0: 13]@ 21440 EXEC: L2[ 0: 13]@ 21440 , Size: 576 + Dscnnconv_ds_4dw_convdepthwise INSTALL: HyperFlash[ 0: 13]@ 23808 LOAD: L2[ 0: 13]@ 23808 EXEC: L2[ 0: 13]@ 23808 , Size: 256 + S25_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25216 LOAD: L2[ 0: 13]@ 25216 EXEC: L2[ 0: 13]@ 25216 , Size: 64 + S25_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25280 LOAD: L2[ 0: 13]@ 25280 EXEC: L2[ 0: 13]@ 25280 , Size: 64 + S25_Infos INSTALL: HyperFlash[ 0: 13]@ 25604 LOAD: L2[ 0: 13]@ 25604 EXEC: L2[ 0: 13]@ 25604 , Size: 9 + Dscnnconv_ds_4pw_convweights_q INSTALL: HyperFlash[ 0: 13]@ 12288 LOAD: L2[ 0: 13]@ 12288 EXEC: L2[ 0: 13]@ 12288 , Size: 4096 + Dscnnconv_ds_4pw_convconv2d_fo INSTALL: HyperFlash[ 0: 13]@ 24064 LOAD: L2[ 0: 13]@ 24064 EXEC: L2[ 0: 13]@ 24064 , Size: 256 + S28_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25344 LOAD: L2[ 0: 13]@ 25344 EXEC: L2[ 0: 13]@ 25344 , Size: 64 + S28_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25408 LOAD: L2[ 0: 13]@ 25408 EXEC: L2[ 0: 13]@ 25408 , Size: 64 + S28_Infos INSTALL: HyperFlash[ 0: 13]@ 25616 LOAD: L2[ 0: 13]@ 25616 EXEC: L2[ 0: 13]@ 25616 , Size: 9 + S29_Infos INSTALL: HyperFlash[ 0: 13]@ 25628 LOAD: L2[ 0: 13]@ 25628 EXEC: L2[ 0: 13]@ 25628 , Size: 9 + Dscnnfc1weights_quantfakequant INSTALL: HyperFlash[ 0: 13]@ 18944 LOAD: L2[ 0: 13]@ 18944 EXEC: L2[ 0: 13]@ 18944 , Size: 768 + Dscnnfc1matmul_bias INSTALL: HyperFlash[ 0: 13]@ 25472 LOAD: L2[ 0: 13]@ 25472 EXEC: L2[ 0: 13]@ 25472 , Size: 48 + S32_Mul_scale INSTALL: HyperFlash[ 0: 13]@ 25640 LOAD: L2[ 0: 13]@ 25640 EXEC: L2[ 0: 13]@ 25640 , Size: 12 + S32_Mul_shift INSTALL: HyperFlash[ 0: 13]@ 25652 LOAD: L2[ 0: 13]@ 25652 EXEC: L2[ 0: 13]@ 25652 , Size: 12 + S32_Infos INSTALL: HyperFlash[ 0: 13]@ 25664 LOAD: L2[ 0: 13]@ 25664 EXEC: L2[ 0: 13]@ 25664 , Size: 9 + S33_Infos INSTALL: HyperFlash[ 0: 13]@ 25676 LOAD: L2[ 0: 13]@ 25676 EXEC: L2[ 0: 13]@ 25676 , Size: 9 + Output_1 Externally allocated + S4_Output EXEC: L2[ 1: 2]@ 33688 , Size: 8000 + S7_Output EXEC: L2[ 2: 3]@ 25688 , Size: 8000 + S10_Output EXEC: L2[ 3: 4]@ 33688 , Size: 8000 + S13_Output EXEC: L2[ 4: 5]@ 25688 , Size: 8000 + S16_Output EXEC: L2[ 5: 6]@ 33688 , Size: 8000 + S19_Output EXEC: L2[ 6: 7]@ 25688 , Size: 8000 + S22_Output EXEC: L2[ 7: 8]@ 33688 , Size: 8000 + S25_Output EXEC: L2[ 8: 9]@ 41688 , Size: 8000 + S28_Output EXEC: L2[ 9: 10]@ 25688 , Size: 8000 + S29_Output EXEC: L2[ 10: 11]@ 33688 , Size: 64 + S32_Output EXEC: L2[ 11: 12]@ 25688 , Size: 12 +------------------------------------------------------------------------------------------------------------------------------------------------ + +------------------------------------------------------------------------------------------------------------------------------------------------ +Graph stacked tensors +------------------------------------------------------------------------------------------------------------------------------------------------ + +Generating Code For User Kernel: S4_Conv2d_64x1x10x4_Relu +Generating Code For User Kernel: S7_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S10_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S13_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S16_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S19_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S22_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S25_Conv2d_64x1x3x3_Relu +Generating Code For User Kernel: S28_Conv2d_64x64x1x1_Relu +Generating Code For User Kernel: S29_AveragePool_25x5 +Generating Code For User Kernel: S32_Linear_12x64x1x1 +Generating Code For User Kernel: S33_SoftMax +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1weights_quantfakequ.tensor: 2560 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_1conv2d_fold_bias.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S4_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S7_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_1pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S10_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S13_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_2pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S16_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S19_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_3pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S22_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convweights_q.tensor: 576 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4dw_convdepthwise.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S25_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convweights_q.tensor: 4096 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnconv_ds_4pw_convconv2d_fo.tensor: 64 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_scale.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Mul_shift.tensor: 64 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S28_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S29_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1weights_quantfakequant.tensor: 768 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/Dscnnfc1matmul_bias.tensor: 12 Word items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_scale.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Mul_shift.tensor: 12 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S32_Infos.tensor: 9 Byte items +Loading coefficient file ./BUILD_MODEL_SQ8_EMUL/tensors/S33_Infos.tensor: 9 Byte items +Flash image KWS_ds_cnn_s_quant_L3_Flash_Const.dat (size 25688) for device AT_MEM_L3_HFLASH successfuly generated + +Shared L1 Memory size (Bytes) : Given: 48736, Used: 43448 +L2 Memory size (Bytes) : Given: 350000, Used: 49685 +L3 Memory size (Bytes) : Given: 6388608, Used: 0 + +L3 Memory bandwidth for 1 graph run : 0 Bytes +L2 Memory bandwidth for 1 graph run : 170318 Bytes +Sum of all Kernels arguments size : 170318 Bytes +Tiling Bandwith overhead : 1.000000 Move/KerArgSize +Sum of baseline bandwidth : 4248224 Bytes +Percentage of baseline BW for L2 : 4.00916 % +Percentage of baseline BW for L3 : 0 % +Sum of all Kernels operations : 2704780 Operations +Total amount of flash coefficients : 25688 Bytes + +Basic kernels library : CNN_BasicKernels_SQ8.h + : KWS_ds_cnn_s_quant.h +Output Directory : BUILD_MODEL_SQ8_EMUL + +The following files have been generated: + KWS_ds_cnn_s_quantKernels.c Generated C code for the user kernels and the user kernels groups + KWS_ds_cnn_s_quantKernels.h Header file for the generated C code + KWS_ds_cnn_s_quant_L3_Flash_Const.dat Flash content for Graph constants +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c main_emulation.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/main_emulation.d -o BUILD_EMUL/main_emulation.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.d -o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.d -o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -c /home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.c -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -MD -MF BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.d -o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o +gcc -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -MMD -MP -DSMALL -DWITH_MFCC -g -O3 -D__EMUL__ -DAT_INPUT_HEIGHT=49 -DAT_INPUT_WIDTH=10 -DAT_INPUT_COLORS= -I. -I/home/marco-gwt/GWT/AutotilerV2/Emulation -I/home/marco-gwt/GWT/AutotilerV2/Autotiler -I/home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries -I/home/marco-gwt/GWT/AutotilerV2/DSP_Libraries -I/home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8 -IBUILD_MODEL_SQ8_EMUL -I/home/marco-gwt/GWT/gap_sdk/libs/gap_lib/include -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -I/home/marco-gwt/GWT/AutotilerV2/DSP_Generators -I/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL -o kws_ds_cnn_emul BUILD_EMUL/main_emulation.o BUILD_EMUL/BUILD_MODEL_SQ8_EMUL/KWS_ds_cnn_s_quantKernels.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/img_io/ImgIO.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/SSD_BasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/Generators/BilinearResizes/ResizeBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries/CNN_CopyBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_AT_Misc.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/math_funcs.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Activation_HWC_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Bias_Linear_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Pooling_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_Conv_DW_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_MatAlgebra_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/CNN_SoftMax_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/CNN_Libraries_SQ8/RNN_SQ8.o BUILD_EMUL//home/marco-gwt/GWT/gap_sdk/libs/gap_lib/wav_io/wavIO.o BUILD_EMUL//home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/BUILD_MFCC_MODEL/MFCCKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/TwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/RFFTTwiddlesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/LUT_Tables/SwapTablesDef.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/MfccBasicKernels.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/FFT_Library.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/CmplxFunctions.o BUILD_EMUL//home/marco-gwt/GWT/AutotilerV2/DSP_Libraries/PreProcessing.o -lm +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +W0608 13:27:02.940144 140090316728128 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +W0608 13:27:02.940568 140090316728128 module_wrapper.py:139] From utils/test_accuracy_emul.py:95: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +W0608 13:27:02.940791 140090316728128 module_wrapper.py:139] From utils/test_accuracy_emul.py:96: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead. + +2021-06-08 13:27:02.941798: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA +2021-06-08 13:27:02.948856: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2096060000 Hz +2021-06-08 13:27:02.949153: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55ac7f249e70 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2021-06-08 13:27:02.949194: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2021-06-08 13:27:02.951213: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/marco-gwt/GWT/gap_sdk/install/workstation/lib +2021-06-08 13:27:02.951284: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (303) +2021-06-08 13:27:02.951314: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ubuntu): /proc/driver/nvidia/version does not exist +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +W0608 13:28:36.446930 140090316728128 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:347: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +WARNING:tensorflow:From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +W0608 13:28:36.449008 140090316728128 module_wrapper.py:139] From /home/marco-gwt/GWT/NN_menu/starters/keyword_spotting/utils/input_data.py:348: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +W0608 13:28:36.622331 140090316728128 module_wrapper.py:139] From utils/test_accuracy_emul.py:111: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead. + +INFO:tensorflow:Validation set size:4445 +I0608 13:28:36.622596 140090316728128 test_accuracy_emul.py:111] Validation set size:4445 +INFO:tensorflow:Test set size:4890 +I0608 13:30:17.813777 140090316728128 test_accuracy_emul.py:157] Test set size:4890 +rm: cannot remove 'test.pgm': No such file or directory +make[1]: Leaving directory '/home/marco-gwt/GWT/NN_menu/starters/keyword_spotting' +{'desired_samples': 16000, 'window_size_samples': 640, 'window_stride_samples': 320, 'spectrogram_length': 49, 'dct_coefficient_count': 10, 'fingerprint_width': 10, 'fingerprint_size': 490, 'label_count': 12, 'sample_rate': 16000, 'preprocess': 'mfcc', 'average_window_width': -1, 'use_power': False} +Pred/Tot: 89/ 100 Accuracy: 89.00% +Pred/Tot: 182/ 200 Accuracy: 91.00% +Pred/Tot: 272/ 300 Accuracy: 90.67% +Pred/Tot: 355/ 400 Accuracy: 88.75% +Pred/Tot: 445/ 500 Accuracy: 89.00% +Pred/Tot: 534/ 600 Accuracy: 89.00% +Pred/Tot: 624/ 700 Accuracy: 89.14% +Pred/Tot: 712/ 800 Accuracy: 89.00% +Pred/Tot: 801/ 900 Accuracy: 89.00% +Pred/Tot: 896/1000 Accuracy: 89.60% +Pred/Tot: 987/1100 Accuracy: 89.73% +Pred/Tot: 1078/1200 Accuracy: 89.83% +Pred/Tot: 1165/1300 Accuracy: 89.62% +Pred/Tot: 1255/1400 Accuracy: 89.64% +Pred/Tot: 1336/1500 Accuracy: 89.07% +Pred/Tot: 1430/1600 Accuracy: 89.38% +Pred/Tot: 1519/1700 Accuracy: 89.35% +Pred/Tot: 1606/1800 Accuracy: 89.22% +Pred/Tot: 1694/1900 Accuracy: 89.16% +Pred/Tot: 1781/2000 Accuracy: 89.05% +Pred/Tot: 1867/2100 Accuracy: 88.90% +Pred/Tot: 1957/2200 Accuracy: 88.95% +Pred/Tot: 2048/2300 Accuracy: 89.04% +Pred/Tot: 2137/2400 Accuracy: 89.04% +Pred/Tot: 2223/2500 Accuracy: 88.92% +Pred/Tot: 2313/2600 Accuracy: 88.96% +Pred/Tot: 2405/2700 Accuracy: 89.07% +Pred/Tot: 2497/2800 Accuracy: 89.18% +Pred/Tot: 2583/2900 Accuracy: 89.07% +Pred/Tot: 2665/3000 Accuracy: 88.83% +Pred/Tot: 2756/3100 Accuracy: 88.90% +Pred/Tot: 2849/3200 Accuracy: 89.03% +Pred/Tot: 2942/3300 Accuracy: 89.15% +Pred/Tot: 3027/3400 Accuracy: 89.03% +Pred/Tot: 3115/3500 Accuracy: 89.00% +Pred/Tot: 3205/3600 Accuracy: 89.03% +Pred/Tot: 3290/3700 Accuracy: 88.92% +Pred/Tot: 3383/3800 Accuracy: 89.03% +Pred/Tot: 3470/3900 Accuracy: 88.97% +Pred/Tot: 3558/4000 Accuracy: 88.95% +Pred/Tot: 3648/4100 Accuracy: 88.98% +Pred/Tot: 3738/4200 Accuracy: 89.00% +Pred/Tot: 3826/4300 Accuracy: 88.98% +Pred/Tot: 3915/4400 Accuracy: 88.98% + +FINAL VALIDATION ACCURACY: +Pred/Tot: 3956/4444 Accuracy: 89.02% + +Confusion matrix: +[[371 0 0 0 0 0 0 0 0 0 0 0] + [ 2 272 4 4 6 12 11 24 11 1 7 17] + [ 1 3 376 4 0 2 9 1 0 0 0 1] + [ 1 11 4 323 0 23 1 0 0 0 0 43] + [ 0 6 0 0 325 2 3 1 1 4 8 0] + [ 1 8 1 7 0 348 2 0 1 0 3 6] + [ 0 8 8 3 2 1 318 10 0 0 1 1] + [ 0 7 0 1 0 0 4 347 1 0 0 3] + [ 2 12 0 0 5 0 0 1 331 10 0 2] + [ 1 4 0 0 39 1 2 0 15 303 2 6] + [ 3 7 0 0 14 7 3 0 0 2 311 3] + [ 3 16 1 3 1 12 0 0 1 1 3 331]] +Pred/Tot: 86/ 100 Accuracy: 86.00% +Pred/Tot: 178/ 200 Accuracy: 89.00% +Pred/Tot: 270/ 300 Accuracy: 90.00% +Pred/Tot: 355/ 400 Accuracy: 88.75% +Pred/Tot: 439/ 500 Accuracy: 87.80% +Pred/Tot: 529/ 600 Accuracy: 88.17% +Pred/Tot: 620/ 700 Accuracy: 88.57% +Pred/Tot: 711/ 800 Accuracy: 88.88% +Pred/Tot: 803/ 900 Accuracy: 89.22% +Pred/Tot: 896/1000 Accuracy: 89.60% +Pred/Tot: 981/1100 Accuracy: 89.18% +Pred/Tot: 1075/1200 Accuracy: 89.58% +Pred/Tot: 1161/1300 Accuracy: 89.31% +Pred/Tot: 1250/1400 Accuracy: 89.29% +Pred/Tot: 1337/1500 Accuracy: 89.13% +Pred/Tot: 1431/1600 Accuracy: 89.44% +Pred/Tot: 1521/1700 Accuracy: 89.47% +Pred/Tot: 1615/1800 Accuracy: 89.72% +Pred/Tot: 1703/1900 Accuracy: 89.63% +Pred/Tot: 1794/2000 Accuracy: 89.70% +Pred/Tot: 1885/2100 Accuracy: 89.76% +Pred/Tot: 1979/2200 Accuracy: 89.95% +Pred/Tot: 2071/2300 Accuracy: 90.04% +Pred/Tot: 2160/2400 Accuracy: 90.00% +Pred/Tot: 2248/2500 Accuracy: 89.92% +Pred/Tot: 2336/2600 Accuracy: 89.85% +Pred/Tot: 2428/2700 Accuracy: 89.93% +Pred/Tot: 2516/2800 Accuracy: 89.86% +Pred/Tot: 2604/2900 Accuracy: 89.79% +Pred/Tot: 2689/3000 Accuracy: 89.63% +Pred/Tot: 2782/3100 Accuracy: 89.74% +Pred/Tot: 2874/3200 Accuracy: 89.81% +Pred/Tot: 2967/3300 Accuracy: 89.91% +Pred/Tot: 3056/3400 Accuracy: 89.88% +Pred/Tot: 3144/3500 Accuracy: 89.83% +Pred/Tot: 3238/3600 Accuracy: 89.94% +Pred/Tot: 3320/3700 Accuracy: 89.73% +Pred/Tot: 3413/3800 Accuracy: 89.82% +Pred/Tot: 3504/3900 Accuracy: 89.85% +Pred/Tot: 3586/4000 Accuracy: 89.65% +Pred/Tot: 3676/4100 Accuracy: 89.66% +Pred/Tot: 3769/4200 Accuracy: 89.74% +Pred/Tot: 3854/4300 Accuracy: 89.63% +Pred/Tot: 3946/4400 Accuracy: 89.68% +Pred/Tot: 4037/4500 Accuracy: 89.71% +Pred/Tot: 4124/4600 Accuracy: 89.65% +Pred/Tot: 4210/4700 Accuracy: 89.57% +Pred/Tot: 4299/4800 Accuracy: 89.56% + +FINAL TESTING ACCURACY: +Pred/Tot: 4381/4889 Accuracy: 89.61% + +Confusion matrix: +[[408 0 0 0 0 0 0 0 0 0 0 0] + [ 0 327 3 5 4 8 8 19 10 2 4 18] + [ 1 11 395 2 0 1 8 1 0 0 0 0] + [ 1 13 5 320 0 23 2 1 0 0 0 40] + [ 0 6 0 1 390 0 2 1 5 4 8 8] + [ 1 12 3 2 0 362 6 0 5 2 2 11] + [ 2 3 9 1 1 0 381 12 0 0 2 1] + [ 0 5 0 1 1 0 6 380 0 0 1 2] + [ 0 15 0 0 9 6 1 1 360 3 0 1] + [ 1 10 1 0 44 0 2 0 9 318 2 15] + [ 0 2 0 0 14 7 2 0 0 3 375 8] + [ 0 8 0 12 2 9 3 1 0 1 1 365]] diff --git a/common/model_decl.mk b/common/model_decl.mk index 3a6ed31..5c50217 100644 --- a/common/model_decl.mk +++ b/common/model_decl.mk @@ -10,7 +10,7 @@ MODEL_PREFIX?=GapFlow MODEL_PYTHON=python3 -TRAINED_TFLITE_MODEL=model/$(MODEL_PREFIX).tflite +MODEL_PATH = $(MODEL_BUILD)/$(MODEL_PREFIX).tflite MODEL_COMMON ?= common MODEL_COMMON_INC ?= $(GAP_SDK_HOME)/libs/gap_lib/include MODEL_COMMON_SRC ?= $(GAP_SDK_HOME)/libs/gap_lib/img_io @@ -43,7 +43,6 @@ $(info script $(NNTOOL_SCRIPT)) ifndef NNTOOL_SCRIPT NNTOOL_SCRIPT=model/nntool_script endif -IMAGES = images RM=rm -f NNTOOL=nntool diff --git a/common/model_rules.mk b/common/model_rules.mk index 5c3a517..5815b9e 100644 --- a/common/model_rules.mk +++ b/common/model_rules.mk @@ -16,7 +16,7 @@ endif USE_DISP=1 ifdef USE_DISP - SDL_FLAGS= -lSDL2 -lSDL2_ttf -DAT_DISPLAY + SDL_FLAGS= -lSDL2 -lSDL2_ttf else SDL_FLAGS= endif @@ -37,7 +37,7 @@ endif $(MODEL_BUILD): mkdir $(MODEL_BUILD) -$(MODEL_TFLITE): $(TRAINED_TFLITE_MODEL) | $(MODEL_BUILD) +$(MODEL_PATH): $(TRAINED_MODEL) | $(MODEL_BUILD) cp $< $@ # Creates an NNTOOL state file by running the commands in the script @@ -45,19 +45,24 @@ $(MODEL_TFLITE): $(TRAINED_TFLITE_MODEL) | $(MODEL_BUILD) # The commands: # Adjust the model to match AutoTiler tensor order # Fuse nodes together to match fused AutoTiler generators +# Quantize the graph if not already done with tflite quantization # Save the graph state files +$(MODEL_STATE): $(MODEL_PATH) $(IMAGES) $(NNTOOL_SCRIPT) | $(MODEL_BUILD) + echo "GENERATING NNTOOL STATE FILE" + $(NNTOOL) -s $(NNTOOL_SCRIPT) $(MODEL_PATH) $(NNTOOL_EXTRA_FLAGS) + +nntool_state: $(MODEL_STATE) + # Runs NNTOOL with its state file to generate the autotiler model code -$(MODEL_BUILD)/$(MODEL_SRC): $(MODEL_TFLITE) | $(MODEL_BUILD) $(SAMPLES) - echo "GENERATING AUTOTILER MODEL $(MODEL_BUILD)" - sed -e "s|MODEL_SRC|$(MODEL_SRC)|g" -e "s|TENSORS_DIR|$(TENSORS_DIR)|g" -e "s|MODEL_BUILD|$(MODEL_BUILD)|g" -e "s|GRAPH_DUMP|$(NNTOOL_SET_GRAPH_DUMP)|g" -e "s|LARGE_OPT|$(LARGE_OPT)|g" \ - $(NNTOOL_SCRIPT_PARAMETRIC) > $(NNTOOL_SCRIPT) - $(NNTOOL) -s $(NNTOOL_SCRIPT) $< $(NNTOOL_EXTRA_FLAGS) +$(MODEL_BUILD)/$(MODEL_SRC) $(MODEL_EXPRESSIONS): $(MODEL_STATE) $(MODEL_PATH) | $(MODEL_BUILD) + echo "GENERATING AUTOTILER MODEL" + $(NNTOOL) -g -M $(MODEL_BUILD) -m $(MODEL_SRC) -T $(TENSORS_DIR) -H $(MODEL_HEADER) $(MODEL_GENFLAGS_EXTRA) $< nntool_gen: $(MODEL_BUILD)/$(MODEL_SRC) # Build the code generator from the model code -$(MODEL_GEN_EXE): $(CNN_GEN) $(MODEL_BUILD)/$(MODEL_SRC) $(EXTRA_GENERATOR_SRC) +$(MODEL_GEN_EXE): $(CNN_GEN) $(MODEL_BUILD)/$(MODEL_SRC) $(EXTRA_GENERATOR_SRC) | $(MODEL_BUILD) echo "COMPILING AUTOTILER MODEL" gcc -g -o $(MODEL_GEN_EXE) -I. -I$(TILER_INC) -I$(TILER_EMU_INC) $(CNN_GEN_INCLUDE) $(CNN_LIB_INCLUDE) $^ $(TILER_LIB) $(SDL_FLAGS) @@ -71,11 +76,13 @@ $(MODEL_GEN_C): $(MODEL_GEN_EXE) # A phony target to simplify including this in the main Makefile model: $(MODEL_GEN_C) -clean_model: +clean_at_model: $(RM) $(MODEL_GEN_EXE) - $(RM) -rf $(MODEL_BUILD) $(RM) $(MODEL_BUILD)/*.dat +clean_model: + $(RM) -rf $(MODEL_BUILD) + clean_train: $(RM) -rf $(MODEL_TRAIN_BUILD) diff --git a/emul.mk b/emul.mk index 7409e2c..a337c26 100644 --- a/emul.mk +++ b/emul.mk @@ -41,12 +41,10 @@ RM=rm -f QUANT_BITS=8 BUILD_DIR=BUILD -NNTOOL_SCRIPT_PARAMETRIC=model/nntool_script_params -ifeq ($(DUMP_TENSORS), 1) - NNTOOL_SET_GRAPH_DUMP = set graph_dump_tensor 7 -endif +#MODEL_NE16 ?= 0 +NNTOOL_SCRIPT=model/nntool_script +MODEL_SUFFIX = _SQ$(QUANT_BITS)_EMUL NNTOOL_EXTRA_FLAGS = -q -MODEL_SUFFIX = _$(QUANT_BITS)BIT_EMUL CLUSTER_STACK_SIZE=4096 CLUSTER_SLAVE_STACK_SIZE=1024 @@ -56,30 +54,27 @@ MODEL_L2_MEMORY=350000 MODEL_L3_MEMORY=6388608 MODEL_SIZE_CFLAGS = -DAT_INPUT_HEIGHT=$(AT_INPUT_HEIGHT) -DAT_INPUT_WIDTH=$(AT_INPUT_WIDTH) -DAT_INPUT_COLORS=$(AT_INPUT_COLORS) -include common/model_decl.mk -include mfcc_model.mk ifeq ($(USE_POWER), 1) # override the tflite model name to the one which expects power MFCC -> more efficient - TRAINED_TFLITE_MODEL=model/$(MODEL_PREFIX)_power.tflite + TRAINED_MODEL=model/$(MODEL_PREFIX)_power.tflite +else + TRAINED_MODEL=model/$(MODEL_PREFIX).tflite endif +include common/model_decl.mk +include mfcc_model.mk MODEL_GEN_EXTRA_FLAGS= -f $(MODEL_BUILD) CC = gcc SRCS = main_emulation.c $(MODEL_GEN_C) $(MODEL_COMMON_SRCS) $(CNN_LIB) -SRCS += $(GAP_LIB_PATH)/wav_io/wavIO.c $(MFCC_KER_SRCS) $(MFCCBUILD_DIR)/MFCCKernels.c +SRCS += $(GAP_LIB_PATH)/wav_io/wavIO.c $(MFCCBUILD_DIR)/MFCCKernels.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/TwiddlesDef.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/RFFTTwiddlesDef.c $(TILER_DSP_KERNEL_PATH)/LUT_Tables/SwapTablesDef.c +SRCS += $(TILER_DSP_KERNEL_PATH)/MfccBasicKernels.c $(TILER_DSP_KERNEL_PATH)/FFT_Library.c $(TILER_DSP_KERNEL_PATH)/CmplxFunctions.c $(TILER_DSP_KERNEL_PATH)/PreProcessing.c INCLUDES = -I. -I$(TILER_EMU_INC) -I$(TILER_INC) $(CNN_LIB_INCLUDE) -I$(MODEL_BUILD) -I$(GAP_SDK_HOME)/libs/gap_lib/include INCLUDES += -I$(MFCCBUILD_DIR) -I$(MFCC_GENERATOR) -I$(MFCCBUILD_DIR) ifeq ($(WITH_MFCC), 1) CFLAGS += -DWITH_MFCC endif -ifeq ($(USE_POWER), 0) - CFLAGS += -DUSE_ABS -endif -ifeq ($(USE_HIGH_PREC), 1) - CFLAGS += -DHIGH_PREC_FFT -endif -CFLAGS += -g -O3 -D__EMUL__ $(MODEL_SIZE_CFLAGS) +CFLAGS += -g -O3 -D__EMUL__ $(MODEL_SIZE_CFLAGS) LFLAGS = LIBS = -lm @@ -92,7 +87,7 @@ MAIN = kws_ds_cnn_emul generate_samples: python utils/generate_samples_images.py --dct_coefficient_count $(DCT_COUNT) --window_size_ms $(FRAME_SIZE_ms) --window_stride_ms $(FRAME_STEP_ms) -all: model mfcc_model $(MAIN) +all: model $(MFCCBUILD_DIR)/MFCCKernels.c $(MAIN) $(OBJS) : $(BUILD_DIR)/%.o : %.c @mkdir -p $(dir $@) @@ -101,7 +96,7 @@ $(OBJS) : $(BUILD_DIR)/%.o : %.c $(MAIN): $(OBJS) $(CC) $(CFLAGS) -MMD -MP $(CFLAGS) $(INCLUDES) -o $(MAIN) $(OBJS) $(LFLAGS) $(LIBS) -clean: clean_model clean_mfcc_model +clean: $(RM) -r $(BUILD_DIR) $(RM) $(MAIN) diff --git a/main_emulation.c b/main_emulation.c index 31edd69..66413d1 100644 --- a/main_emulation.c +++ b/main_emulation.c @@ -18,15 +18,15 @@ #include "Gap.h" #ifdef SMALL #include "KWS_ds_cnn_s_quantKernels.h" - #include "MFCC_params_SMALL.h" + #include "MFCC_params.h" #endif #ifdef MEDIUM #include "KWS_ds_cnn_m_quantKernels.h" - #include "MFCC_params_MEDIUM.h" + #include "MFCC_params.h" #endif #ifdef LARGE #include "KWS_ds_cnn_l_quantKernels.h" - #include "MFCC_params_LARGE.h" + #include "MFCC_params.h" #endif #ifdef WITH_MFCC #include "gaplib/wavIO.h" @@ -39,13 +39,14 @@ #define WAV_BUFFER_SIZE 16000 // Something more than 1sec@16kHz #define NUM_CLASSES 12 - -/* - DCT_NORMALIZATION -> np.sqrt(2/(N_DCT))*0.5 - NNTOOL_INPUT_SCALE_FLOAT -> 1.9372712 - - SCALE = DCT_NORMALIZATION * 2**(-QDCT) / NNTOOL_INPUT_SCALE_FLOAT -*/ +#define N_FRAME 49 +#define NORM 3 +#define MFCC_Q 15-NORM-7 + +//DCT_NORMALIZATION -> np.sqrt(2/(N_DCT))*0.5 +//NNTOOL_INPUT_SCALE_FLOAT -> 1.9372712 +// SCALE = DCT_NORMALIZATION*DCT_SCALE/NNTOOL_INPUT_SCALE_FLOAT +// DCT_SCALE = 2**(-MFCC_Q) #define INPUT_SCALE 236 #define INPUT_SCALEN 17 @@ -66,7 +67,11 @@ int rec_digit; static void RunMFCC(){ L1_Memory = __PREFIX(_L1_Memory); PRINTF("Runnning MFCC\n"); - MFCC(inSig, mfcc_features, 0, TwiddlesLUT, SwapLUT, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, 5, DCT_Coeff); + #if (DATA_TYPE==1) //HIGH PRECISION 32BITS FFT + MFCC(inSig, mfcc_features, TwiddlesLUTR2, SwapTableR2, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, NORM, DCT_Coeff); + #else //LOW PRECISION 16BITS FFT + MFCC(inSig, mfcc_features, TwiddlesLUTR2, RFFTTwiddlesLUT, SwapTableR2, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, NORM, DCT_Coeff); + #endif } static void Runkws() diff --git a/main_with_mfcc.c b/main_with_mfcc.c index 4a9d37f..42acde5 100644 --- a/main_with_mfcc.c +++ b/main_with_mfcc.c @@ -11,17 +11,17 @@ #include "Gap.h" #ifdef SMALL #include "KWS_ds_cnn_s_quantKernels.h" - #include "MFCC_params_SMALL.h" + #include "MFCC_params.h" #define L1_SIZE _KWS_ds_cnn_s_quant_L1_Memory_SIZE #endif #ifdef MEDIUM #include "KWS_ds_cnn_m_quantKernels.h" - #include "MFCC_params_MEDIUM.h" + #include "MFCC_params.h" #define L1_SIZE _KWS_ds_cnn_m_quant_L1_Memory_SIZE #endif #ifdef LARGE #include "KWS_ds_cnn_l_quantKernels.h" - #include "MFCC_params_LARGE.h" + #include "MFCC_params.h" #define L1_SIZE _KWS_ds_cnn_l_quant_L1_Memory_SIZE #endif #include "gaplib/wavIO.h" @@ -29,13 +29,16 @@ #include "LUT.def" #include "MFCC_FB.def" -#define WAV_BUFFER_SIZE 16000 // 1sec@16kHz +#define WAV_BUFFER_SIZE 16000 // Something more than 1sec@16kHz #define NUM_CLASSES 12 +#define N_FRAME 49 +#define NORM 3 +#define MFCC_Q 15-NORM-7 -//DCT_NORMALIZATION -> np.sqrt(1/(N_DCT))*0.5 +//DCT_NORMALIZATION -> np.sqrt(2/(N_DCT))*0.5 //NNTOOL_INPUT_SCALE_FLOAT -> 1.9372712 -// SCALE = DCT_NORMALIZATION*NNTOOL_INPUT_SCALE_FLOAT/DCT_SCALE -// MFCC -> Q(10-Norm) Norm=5 +// SCALE = DCT_NORMALIZATION*DCT_SCALE/NNTOOL_INPUT_SCALE_FLOAT +// DCT_SCALE = 2**(-MFCC_Q) #define INPUT_SCALE 236 #define INPUT_SCALEN 17 @@ -143,7 +146,11 @@ static void RunMFCC(){ start = gap_cl_readhwtimer(); #endif // run inference on inSig[0:WAV_BUFFER_SIZE] and inSig[WAV_BUFFER_SIZE/2:WAV_BUFER_SIZE*3/2] alternately - MFCC(inSig, mfcc_features, 0, TwiddlesLUT, SwapLUT, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, 5, DCT_Coeff); + #if (DATA_TYPE==1) //HIGH PRECISION 32BITS FFT + MFCC(inSig, mfcc_features, TwiddlesLUTR2, SwapTableR2, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, NORM, DCT_Coeff); + #else //LOW PRECISION 16BITS FFT + MFCC(inSig, mfcc_features, TwiddlesLUTR2, RFFTTwiddlesLUT, SwapTableR2, WindowLUT, MFCC_FilterBank, MFCC_Coeffs, NORM, DCT_Coeff); + #endif #ifdef PERF elapsed = gap_cl_readhwtimer() - start; total_cyc += elapsed; diff --git a/mfcc_model.mk b/mfcc_model.mk index 4387c5a..1220603 100644 --- a/mfcc_model.mk +++ b/mfcc_model.mk @@ -1,13 +1,15 @@ # User Test #------------------------------------------ -MFCC_GENERATOR ?= $(TILER_GENERATOR_PATH)/MFCC +MFCC_GENERATOR ?= $(TILER_DSP_GENERATOR_PATH) MFCCBUILD_DIR ?= $(CURDIR)/BUILD_MFCC_MODEL MFCC_MODEL_GEN = $(MFCCBUILD_DIR)/GenMFCC -MFCC_SRCG += $(MFCC_GENERATOR)/MfccGenerator.c -MFCC_KER_SRCS = $(MFCC_GENERATOR)/MfccBasicKernels.c $(MFCC_GENERATOR)/FFTLib.c - -# Everything bellow is not application specific -TABLE_CFLAGS=-lm +FFT_LUT = $(MFCCBUILD_DIR)/LUT.def +MFCC_LUT = $(MFCCBUILD_DIR)/MFCC_FB.def +ifeq ($(USE_HIGH_PREC), 1) + MFCC_PARAMS_JSON = MfccConfig_HighPrec.json +else + MFCC_PARAMS_JSON = MfccConfig.json +endif #SDL_FLAGS= -lSDL2 -lSDL2_ttf -DAT_DISPLAY ifdef MODEL_L1_MEMORY @@ -20,53 +22,19 @@ ifdef MODEL_L3_MEMORY MODEL_GEN_EXTRA_FLAGS += --L3 $(MODEL_L3_MEMORY) endif -SMALL ?= 0 -MEDIUM ?= 0 -LARGE ?= 0 -ifeq ($(SMALL), 1) -SIZE_DEF += -DSMALL -NN_SIZE = SMALL -else -ifeq ($(MEDIUM), 1) -SIZE_DEF += -DMEDIUM -NN_SIZE = MEDIUM -else -ifeq ($(LARGE), 1) -SIZE_DEF += -DLARGE -NN_SIZE = LARGE -else -$(error You must set to 1 one of SMALL, MEDIUM, LARGE to select a network) -endif -endif -endif - -EXTRA_FLAGS = -ifeq ($(USE_POWER), 1) - EXTRA_FLAGS += --use_power -endif -ifeq ($(USE_HIGH_PREC), 1) - EXTRA_FLAGS += --use_high_prec -endif - $(MFCCBUILD_DIR): mkdir $(MFCCBUILD_DIR) -$(MFCCBUILD_DIR)/LUT.def: $(MFCCBUILD_DIR) - python3 $(MFCC_GENERATOR)/GenLUT.py --fft_lut_file $(MFCCBUILD_DIR)/LUT.def --mfcc_bf_lut_file $(MFCCBUILD_DIR)/MFCC_FB.def \ - --sample_rate 16000 --frame_size $(FRAME_SIZE) --frame_step $(FRAME_STEP) --n_frame $(AT_INPUT_HEIGHT) \ - --n_fft 1024 --n_dct 40 --mfcc_bank_cnt 40 --fmin 20 --fmax 4000 --mfcc_bank_cnt 40 --preempfactor 0.0 \ - --use_tf_mfcc --save_params_header MFCC_params_$(NN_SIZE).h $(EXTRA_FLAGS) - # Build the code generator from the model code -$(MFCC_MODEL_GEN): $(MFCCBUILD_DIR)/LUT.def $(MFCCBUILD_DIR) - gcc -g -o $(MFCC_MODEL_GEN) -I. -I$(MFCC_GENERATOR) -I$(TILER_INC) -I$(TILER_EMU_INC) MFCCmodel.c $(MFCC_SRCG) $(TILER_LIB) $(TABLE_CFLAGS) $(SIZE_DEF) +$(MFCC_MODEL_GEN): $(MFCCBUILD_DIR) + gcc -g -o $(MFCC_MODEL_GEN) -I. -I$(MFCCBUILD_DIR) -I$(TILER_DSP_GENERATOR_PATH) -I$(TILER_INC) -I$(TILER_EMU_INC) $(CURDIR)/MFCCmodel.c $(TILER_DSP_GENERATOR_PATH)/DSP_Generators.c $(TILER_LIB) -DUSE_POWER=$(USE_POWER) -lm +$(MFCC_LUT): $(MFCCBUILD_DIR) + python $(TILER_MFCC_GEN_LUT_SCRIPT) --fft_lut_file $(FFT_LUT) --mfcc_bf_lut_file $(MFCC_LUT) --params_json $(MFCC_PARAMS_JSON) --save_params_header MFCC_params.h # Run the code generator kernel code -$(MFCCBUILD_DIR)/MFCCKernels.c: $(MFCC_MODEL_GEN) +$(MFCCBUILD_DIR)/MFCCKernels.c: $(MFCC_LUT) $(MFCC_MODEL_GEN) $(MFCC_MODEL_GEN) -o $(MFCCBUILD_DIR) -c $(MFCCBUILD_DIR) $(MODEL_GEN_EXTRA_FLAGS) -mfcc_model: $(MFCCBUILD_DIR)/MFCCKernels.c - -clean_mfcc_model: +clean_mfcc_code: rm -rf $(MFCCBUILD_DIR) diff --git a/model/nntool_script_params b/model/nntool_script similarity index 56% rename from model/nntool_script_params rename to model/nntool_script index 0d244e4..40605e3 100644 --- a/model/nntool_script_params +++ b/model/nntool_script @@ -1,15 +1,10 @@ set debug true adjust fusions --scale8 -show -qshow -LARGE_OPT -set l3_ram_ext_managed false -set graph_reorder_constant_in true +#show +#qshow set graph_produce_node_names true set graph_produce_operinfos true set graph_monitor_cycles true set graph_const_exec_from_flash false -GRAPH_DUMP -gen MODEL_SRC -T TENSORS_DIR -M MODEL_BUILD -t save_state diff --git a/run_test_accuracy.sh b/run_test_accuracy.sh index 245464f..7f60b23 100755 --- a/run_test_accuracy.sh +++ b/run_test_accuracy.sh @@ -1,14 +1,14 @@ -make test_accuracy SMALL=1 USE_HIGH_PREC=0 USE_POWER=1 > log_test_small_power.txt -make test_accuracy SMALL=1 USE_HIGH_PREC=1 USE_POWER=1 > log_test_small_hp_power.txt -make test_accuracy SMALL=1 USE_HIGH_PREC=0 USE_POWER=0 > log_test_small_spectr.txt -make test_accuracy SMALL=1 USE_HIGH_PREC=1 USE_POWER=0 > log_test_small_hp_spectr.txt +make test_accuracy SMALL=1 USE_HIGH_PREC=0 USE_POWER=1 &> accuracy_log/log_test_small_power_v2.txt +make test_accuracy SMALL=1 USE_HIGH_PREC=1 USE_POWER=0 &> accuracy_log/log_test_small_spectr_v2.txt +make test_accuracy SMALL=1 USE_HIGH_PREC=0 USE_POWER=1 &> accuracy_log/log_test_small_power_v2.txt +make test_accuracy SMALL=1 USE_HIGH_PREC=1 USE_POWER=0 &> accuracy_log/log_test_small_spectr_v2.txt -make test_accuracy MEDIUM=1 USE_HIGH_PREC=0 USE_POWER=1 > log_test_medium_power.txt -make test_accuracy MEDIUM=1 USE_HIGH_PREC=1 USE_POWER=1 > log_test_medium_hp_power.txt -make test_accuracy MEDIUM=1 USE_HIGH_PREC=0 USE_POWER=0 > log_test_medium_spectr.txt -make test_accuracy MEDIUM=1 USE_HIGH_PREC=1 USE_POWER=0 > log_test_medium_hp_spectr.txt +make test_accuracy MEDIUM=1 USE_HIGH_PREC=0 USE_POWER=1 &> accuracy_log/log_test_medium_power_v2.txt +make test_accuracy MEDIUM=1 USE_HIGH_PREC=1 USE_POWER=0 &> accuracy_log/log_test_medium_spectr_v2.txt +make test_accuracy MEDIUM=1 USE_HIGH_PREC=0 USE_POWER=1 &> accuracy_log/log_test_medium_power_v2.txt +make test_accuracy MEDIUM=1 USE_HIGH_PREC=1 USE_POWER=0 &> accuracy_log/log_test_medium_spectr_v2.txt -make test_accuracy LARGE=1 USE_HIGH_PREC=0 USE_POWER=1 > log_test_large_power.txt -make test_accuracy LARGE=1 USE_HIGH_PREC=1 USE_POWER=1 > log_test_large_hp_power.txt -make test_accuracy LARGE=1 USE_HIGH_PREC=0 USE_POWER=0 > log_test_large_spectr.txt -make test_accuracy LARGE=1 USE_HIGH_PREC=1 USE_POWER=0 > log_test_large_hp_spectr.txt \ No newline at end of file +make test_accuracy LARGE=1 USE_HIGH_PREC=0 USE_POWER=1 &> accuracy_log/log_test_large_power_v2.txt +make test_accuracy LARGE=1 USE_HIGH_PREC=1 USE_POWER=1 &> accuracy_log/log_test_large_hp_power_v2.txt +make test_accuracy LARGE=1 USE_HIGH_PREC=0 USE_POWER=0 &> accuracy_log/log_test_large_spectr_v2.txt +make test_accuracy LARGE=1 USE_HIGH_PREC=1 USE_POWER=0 &> accuracy_log/log_test_large_hp_spectr_v2.txt \ No newline at end of file diff --git a/utils/test_accuracy_emul.py b/utils/test_accuracy_emul.py index f72fccf..9573e89 100644 --- a/utils/test_accuracy_emul.py +++ b/utils/test_accuracy_emul.py @@ -81,7 +81,7 @@ def main(_): large = int("_l_" in FLAGS.tflite_model) if not small and not medium and not large: raise ValueError("You must select one of the models in model dir") - compile_command = 'make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL={} MEDIUM={} LARGE={} WITH_MFCC={} USE_POWER={} USE_HIGH_PREC={}'.format(small, medium, large, 1 if FLAGS.test_with_wav == True else 0, 1 if FLAGS.use_power_spectrogram == True else 0, FLAGS.use_high_prec) + compile_command = 'make -f emul.mk clean_model clean all DUMP_TENSORS=0 SMALL={} MEDIUM={} LARGE={} WITH_MFCC={} USE_POWER={}'.format(small, medium, large, 1 if FLAGS.test_with_wav == True else 0, 1 if FLAGS.use_power_spectrogram == True else 0) print(compile_command) stream = os.popen(compile_command) for line in stream.readlines(): @@ -305,10 +305,6 @@ def main(_): '--use_power_spectrogram', type=int, default=1) - parser.add_argument( - '--use_high_prec', - type=int, - default=0) FLAGS, unparsed = parser.parse_known_args()