diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index fbfed71c5..b20fdf122 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -94,7 +94,7 @@ def __init__(self, name): attrs.append(ConfigurableAttribute('reuse_factor', default=1, description=descriptions.reuse_factor)) self.attribute_map[layer] = attrs - # seperable is kind of special because it is effectively two layers that will be split + # separable is kind of special because it is effectively two layers that will be split for layer in (SeparableConv1D, SeparableConv2D): attrs = self.attribute_map.get(layer, []) attrs.append(TypeAttribute('depthwise_accum')) diff --git a/hls4ml/backends/oneapi/passes/convolution_templates.py b/hls4ml/backends/oneapi/passes/convolution_templates.py index 17154559d..64d9e4222 100644 --- a/hls4ml/backends/oneapi/passes/convolution_templates.py +++ b/hls4ml/backends/oneapi/passes/convolution_templates.py @@ -1,7 +1,7 @@ from hls4ml.backends.backend import get_backend from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate -from hls4ml.model.layers import Conv1D, Conv2D, Conv2DBatchnorm +from hls4ml.model.layers import Conv1D, Conv2D, Conv2DBatchnorm, DepthwiseConv1D, DepthwiseConv2D # TODO - Dilation rate ? @@ -70,9 +70,20 @@ conv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_conv1d_stream.h'] +depthconv1d_function_template = ( + 'nnet::depthwise_conv_1d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' +) +depthconv1d_include_list = [ + 'nnet_utils/nnet_conv1d.h', + 'nnet_utils/nnet_conv1d_resource.h', + 'nnet_utils/nnet_depthconv1d.h', + 'nnet_utils/nnet_depthconv1d_resource.h', +] + + class Conv1DConfigTemplate(LayerConfigTemplate): def __init__(self): - super().__init__(Conv1D) + super().__init__((Conv1D, DepthwiseConv1D)) self.template = conv1d_config_template self.mult_template = conv_mult_config_template @@ -137,6 +148,12 @@ def format(self, node): return self.template.format(**params) +class DepthwiseConv1DFunctionTemplate(Conv1DFunctionTemplate): + def __init__(self): + super(Conv1DFunctionTemplate, self).__init__(DepthwiseConv1D, include_header=depthconv1d_include_list) + self.template = depthconv1d_function_template + + ''' 2D Conv ''' conv2d_config_template = """struct config{index} : nnet::conv2d_config {{ static const unsigned in_height = {in_height}; @@ -183,7 +200,7 @@ def format(self, node): class Conv2DConfigTemplate(LayerConfigTemplate): def __init__(self): - super().__init__((Conv2D, Conv2DBatchnorm)) + super().__init__((Conv2D, Conv2DBatchnorm, DepthwiseConv2D)) self.template = conv2d_config_template self.mult_template = conv_mult_config_template @@ -233,3 +250,20 @@ def format(self, node): raise RuntimeError('channels_first not supported on oneAPI') params['data_format'] = 'cl' return self.template.format(**params) + + +depthconv2d_function_template = ( + 'nnet::depthwise_conv_2d_{data_format}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' +) +depthconv2d_include_list = [ + 'nnet_utils/nnet_conv2d.h', + 'nnet_utils/nnet_conv2d_resource.h', + 'nnet_utils/nnet_depthconv2d.h', + 'nnet_utils/nnet_depthconv2d_resource.h', +] + + +class DepthwiseConv2DFunctionTemplate(Conv2DFunctionTemplate): + def __init__(self): + super(Conv2DFunctionTemplate, self).__init__(DepthwiseConv2D, include_header=depthconv2d_include_list) + self.template = depthconv2d_function_template diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index a745eceba..7e9325ccd 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -59,7 +59,7 @@ 'convert', [ 'channels_last_converter', - 'seperable_to_depthwise_and_conv', + 'separable_to_depthwise_and_conv', 'remove_transpose_before_flatten', 'remove_nop_transpose', 'remove_single_channel_transpose', diff --git a/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py b/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py index 38eef1e7d..10840ec41 100644 --- a/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py +++ b/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py @@ -1,5 +1,5 @@ """ -This optimizer converts a seperable convolution to a depthwise followed by a regular convolution. +This optimizer converts a separable convolution to a depthwise followed by a regular convolution. For backends with a custom pointwise implementations the regular convolution will subsequently be converted to a pointwise convolution by a different optimizer. """ @@ -10,8 +10,8 @@ from hls4ml.model.optimizer import OptimizerPass -class SeperableToDepthwiseAndConv(OptimizerPass): - """Convert Seperable to DepthwiseConv + Conv (potentially later Pointwise)""" +class SeparableToDepthwiseAndConv(OptimizerPass): + """Convert Separable to DepthwiseConv + Conv (potentially later Pointwise)""" _dw_attributes = ( 'in_width', @@ -70,7 +70,7 @@ def transform(self, model, node): model.config.parse_name_config(dw_name, dw_layer_config) # creating the attributes - dw_attributes = {k: node.attributes[k] for k in SeperableToDepthwiseAndConv._dw_attributes if k in node.attributes} + dw_attributes = {k: node.attributes[k] for k in SeparableToDepthwiseAndConv._dw_attributes if k in node.attributes} dw_attributes['n_filt'] = dw_attributes['n_chan'] * dw_attributes['depth_multiplier'] dw_attributes['use_bias'] = False @@ -100,7 +100,7 @@ def transform(self, model, node): model.config.parse_name_config(pw_name, pw_layer_config) # creating the attributes - pw_attributes = {k: node.attributes[k] for k in SeperableToDepthwiseAndConv._pw_attributes if k in node.attributes} + pw_attributes = {k: node.attributes[k] for k in SeparableToDepthwiseAndConv._pw_attributes if k in node.attributes} pw_attributes['filt_width'] = 1 pw_attributes['filt_height'] = 1 pw_attributes['stride_width'] = 1 diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d.h new file mode 100644 index 000000000..d2c774fcf --- /dev/null +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d.h @@ -0,0 +1,19 @@ +#ifndef NNET_DEPTH_CONV1D_H_ +#define NNET_DEPTH_CONV1D_H_ + +#include "nnet_common.h" +#include "nnet_conv1d.h" +#include "nnet_depthconv1d_resource.h" + +namespace nnet { + +template +void depthwise_conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + depthwise_conv_1d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d_resource.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d_resource.h new file mode 100644 index 000000000..c06b6b14e --- /dev/null +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d_resource.h @@ -0,0 +1,60 @@ +#ifndef NNET_DEPTH_CONV1D_LATENCY_H_ +#define NNET_DEPTH_CONV1D_LATENCY_H_ + +#include "nnet_common.h" +#include "nnet_conv1d_resource.h" +#include "nnet_mult.h" + +namespace nnet { + +template +void depthwise_conv_1d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + int depth_multiplier = CONFIG_T::n_filt / CONFIG_T::n_chan; + [[intel::fpga_register]] int res_idx = 0; + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::out_width * CONFIG_T::n_filt]; + +DM_LOOP: + #pragma unroll + for (int dm = 0; dm < depth_multiplier; dm++) { + + WIDTH_LOOP: + #pragma unroll + for (int w = 0; w < CONFIG_T::out_width; w++) { + + CHAN_LOOP: + #pragma unroll + for (int c = 0; c < CONFIG_T::n_chan; c++) { + + res_idx = (w * CONFIG_T::n_filt) + (c * depth_multiplier) + dm; + + acc[res_idx] = biases[c * depth_multiplier + dm]; + + KERNEL_W_LOOP: + #pragma unroll + for (int kw = 0; kw < CONFIG_T::filt_width; kw++) { + + int w_in = w * CONFIG_T::stride_width + kw - CONFIG_T::pad_left; + + if ((w_in >= 0) && (w_in < CONFIG_T::in_width)) { + + acc[res_idx] += CONFIG_T::mult_config:: + template product::product( + data[(w_in)*CONFIG_T::n_chan + c], + weights[(dm * CONFIG_T::filt_width * CONFIG_T::n_chan) + (kw * CONFIG_T::n_chan) + c]); + } + } + } + } + } + +RESULT: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::out_width * CONFIG_T::n_filt; ires++) { + res[ires] = cast(acc[ires]); + } +} +} // namespace nnet +#endif diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d.h new file mode 100644 index 000000000..87dc1805d --- /dev/null +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d.h @@ -0,0 +1,19 @@ +#ifndef NNET_DEPTH_CONV2D_H_ +#define NNET_DEPTH_CONV2D_H_ + +#include "nnet_common.h" +#include "nnet_conv2d.h" +#include "nnet_depthconv2d_resource.h" + +namespace nnet { + +template +void depthwise_conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + depthwise_conv_2d_resource_cl(data, res, weights, biases); +} + +} // namespace nnet + +#endif diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d_resource.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d_resource.h new file mode 100644 index 000000000..91ddc28f6 --- /dev/null +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d_resource.h @@ -0,0 +1,76 @@ +#ifndef NNET_SEPARABLE_CONV2D_LATENCY_H_ +#define NNET_SEPARABLE_CONV2D_LATENCY_H_ + +#include "nnet_common.h" +#include "nnet_conv2d_resource.h" +#include "nnet_mult.h" + +namespace nnet { + +template +void depthwise_conv_2d_resource_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { + + int depth_multiplier = CONFIG_T::n_filt / CONFIG_T::n_chan; + [[intel::fpga_register]] int res_idx = 0; + + [[intel::fpga_register]] typename CONFIG_T::accum_t acc[CONFIG_T::out_width * CONFIG_T::out_height * CONFIG_T::n_filt]; + +DM_LOOP: + #pragma unroll + for (int dm = 0; dm < depth_multiplier; dm++) { + + HEIGHT_LOOP: + #pragma unroll + for (int h = 0; h < CONFIG_T::out_height; h++) { + WIDTH_LOOP: + #pragma unroll + for (int w = 0; w < CONFIG_T::out_width; w++) { + + CHAN_LOOP: + #pragma unroll + for (int c = 0; c < CONFIG_T::n_chan; c++) { + + res_idx = + (h * CONFIG_T::out_width * CONFIG_T::n_filt) + (w * CONFIG_T::n_filt) + (c * depth_multiplier) + dm; + + acc[res_idx] = biases[c * depth_multiplier + dm]; + + KERNEL_H_LOOP: + #pragma unroll + for (int kh = 0; kh < CONFIG_T::filt_height; kh++) { + KERNEL_W_LOOP: + #pragma unroll + for (int kw = 0; kw < CONFIG_T::filt_width; kw++) { + + int h_in = h * CONFIG_T::stride_height + kh - CONFIG_T::pad_top; + int w_in = w * CONFIG_T::stride_width + kw - CONFIG_T::pad_left; + + if ((h_in >= 0) && (h_in < CONFIG_T::in_height) && (w_in >= 0) && (w_in < CONFIG_T::in_width)) { + + acc[res_idx] += + CONFIG_T::mult_config::template product:: + product( + data[(h_in)*CONFIG_T::in_width * CONFIG_T::n_chan + (w_in)*CONFIG_T::n_chan + c], + weights[(dm * CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan) + + (kh * CONFIG_T::filt_width * CONFIG_T::n_chan) + + (kw * CONFIG_T::n_chan) + c]); + + ; + } + } + } + } + } + } + } + +RESULT: + #pragma unroll + for (int ires = 0; ires < CONFIG_T::out_width * CONFIG_T::out_height * CONFIG_T::n_filt; ires++) { + res[ires] = cast(acc[ires]); + } +} +} // namespace nnet +#endif diff --git a/test/pytest/test_depthconv1d.py b/test/pytest/test_depthconv1d.py index 3734815af..85c8e2ac4 100644 --- a/test/pytest/test_depthconv1d.py +++ b/test/pytest/test_depthconv1d.py @@ -23,6 +23,7 @@ @pytest.mark.parametrize( 'backend, io_type', [ + ('oneAPI', 'io_parallel'), ('Vivado', 'io_parallel'), ('Vitis', 'io_parallel'), ('Vivado', 'io_stream'), diff --git a/test/pytest/test_depthconv2d.py b/test/pytest/test_depthconv2d.py index 9178edf36..4832cb1ae 100644 --- a/test/pytest/test_depthconv2d.py +++ b/test/pytest/test_depthconv2d.py @@ -24,6 +24,7 @@ @pytest.mark.parametrize( 'backend, io_type', [ + ('oneAPI', 'io_parallel'), ('Vivado', 'io_parallel'), ('Vitis', 'io_parallel'), ('Vivado', 'io_stream'), diff --git a/test/pytest/test_sepconv1d.py b/test/pytest/test_sepconv1d.py index 64312e993..aef24db04 100644 --- a/test/pytest/test_sepconv1d.py +++ b/test/pytest/test_sepconv1d.py @@ -23,6 +23,7 @@ @pytest.mark.parametrize( 'backend, io_type', [ + ('oneAPI', 'io_parallel'), ('Vivado', 'io_parallel'), ('Vitis', 'io_parallel'), ('Vivado', 'io_stream'), diff --git a/test/pytest/test_sepconv2d.py b/test/pytest/test_sepconv2d.py index 4732c7c7f..1d056f15c 100644 --- a/test/pytest/test_sepconv2d.py +++ b/test/pytest/test_sepconv2d.py @@ -23,6 +23,7 @@ @pytest.mark.parametrize( 'backend, io_type', [ + ('oneAPI', 'io_parallel'), ('Vivado', 'io_parallel'), ('Vitis', 'io_parallel'), ('Vivado', 'io_stream'),