Skip to content

Commit 08d34d6

Browse files
committed
[ET-VK] Using push constants for conv2d dw.
Pull Request resolved: #7928 This diff is related to the use of push constants for convolutional dw (depthwise) in Executorch's Vulkan backend. This optimization improves memory usage. ghstack-source-id: 263027545 @exported-using-ghexport Differential Revision: [D68493849](https://our.internmc.facebook.com/intern/diff/D68493849/)
1 parent 81f9aed commit 08d34d6

File tree

3 files changed

+67
-59
lines changed

3 files changed

+67
-59
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
3232
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
3333
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
3434
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
35-
${layout_declare_ubo(4, "ivec3", "out_limits")}
36-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
37-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
38-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
39-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
35+
36+
layout(push_constant) uniform restrict Block {
37+
ivec4 out_limits;
38+
ivec4 in_sizes;
39+
ivec2 kernel_size;
40+
ivec2 stride;
41+
ivec2 padding;
42+
ivec2 dilation;
43+
ivec2 overlay_region;
44+
int in_group_size;
45+
int dummy_padding;
46+
float out_min;
47+
float out_max;
48+
};
4049

4150
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
4251

@@ -127,7 +136,7 @@ void main() {
127136
const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
128137
for (int y = 0; y < BATCH_SIZE_Y; y++) {
129138
for (int x = 0; x < BATCH_SIZE_X; x++) {
130-
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) {
139+
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
131140
continue;
132141
}
133142
imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
2424
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
2525
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
2626
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
27-
${layout_declare_ubo(4, "ivec3", "out_limits")}
28-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
29-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
30-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
31-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
27+
28+
layout(push_constant) uniform restrict Block {
29+
ivec4 out_limits;
30+
ivec4 in_sizes;
31+
ivec2 kernel_size;
32+
ivec2 stride;
33+
ivec2 padding;
34+
ivec2 dilation;
35+
ivec2 overlay_region;
36+
int in_group_size;
37+
int dummy_padding;
38+
float out_min;
39+
float out_max;
40+
};
3241

3342
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3443

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 38 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,9 @@ void add_conv2d_node(
407407
wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1};
408408
}
409409

410-
if (method == Conv2dMethod::Pointwise) {
410+
vkapi::ParamsBindList param_buffers;
411+
std::vector<PushConstantDataInfo> push_constants;
412+
if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) {
411413
const utils::ivec4 kernel_param_size_stride = {
412414
kernel_params.kernel_size[0],
413415
kernel_params.kernel_size[1],
@@ -420,55 +422,43 @@ void add_conv2d_node(
420422
kernel_params.dilation[0],
421423
kernel_params.dilation[1]};
422424

423-
graph.execute_nodes().emplace_back(new DispatchNode(
424-
graph,
425-
shader,
426-
wg_size,
427-
graph.create_local_wg_size(wg_size),
428-
// Inputs and Outputs
429-
{{out, vkapi::MemoryAccessType::WRITE},
430-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
431-
// Shader params buffers
432-
{},
433-
// Specialization Constants
434-
{},
435-
// Resizing Logic
436-
resize_conv2d_node,
437-
{weight_data, stride, padding, dilation, transposed, output_padding},
438-
{
439-
graph.logical_limits_pc_of(out),
440-
graph.sizes_pc_of(in),
441-
PushConstantDataInfo(
442-
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
443-
PushConstantDataInfo(
444-
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
445-
PushConstantDataInfo(
446-
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
447-
PushConstantDataInfo(&out_params, sizeof(out_params)),
448-
}));
425+
push_constants = {
426+
graph.logical_limits_pc_of(out),
427+
graph.sizes_pc_of(in),
428+
PushConstantDataInfo(
429+
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
430+
PushConstantDataInfo(
431+
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
432+
PushConstantDataInfo(
433+
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
434+
PushConstantDataInfo(&out_params, sizeof(out_params)),
435+
};
449436
} else {
450-
graph.execute_nodes().emplace_back(new DispatchNode(
451-
graph,
452-
shader,
453-
wg_size,
454-
graph.create_local_wg_size(wg_size),
455-
// Inputs and Outputs
456-
{{out, vkapi::MemoryAccessType::WRITE},
457-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
458-
// Shader params buffers
459-
{
460-
t_out->logical_limits_ubo(),
461-
t_in->sizes_ubo(),
462-
graph.create_params_buffer(kernel_params),
463-
graph.create_params_buffer(extra_params),
464-
graph.create_params_buffer(out_params),
465-
},
466-
// Specialization Constants
467-
{},
468-
// Resizing Logic
469-
resize_conv2d_node,
470-
{weight_data, stride, padding, dilation, transposed, output_padding}));
437+
param_buffers = {
438+
t_out->logical_limits_ubo(),
439+
t_in->sizes_ubo(),
440+
graph.create_params_buffer(kernel_params),
441+
graph.create_params_buffer(extra_params),
442+
graph.create_params_buffer(out_params),
443+
};
471444
}
445+
446+
graph.execute_nodes().emplace_back(new DispatchNode(
447+
graph,
448+
shader,
449+
wg_size,
450+
graph.create_local_wg_size(wg_size),
451+
// Inputs and Outputs
452+
{{out, vkapi::MemoryAccessType::WRITE},
453+
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
454+
// Shader params buffers
455+
param_buffers,
456+
// Specialization Constants
457+
{},
458+
// Resizing Logic
459+
resize_conv2d_node,
460+
{weight_data, stride, padding, dilation, transposed, output_padding},
461+
push_constants));
472462
}
473463

474464
void add_conv1d_node(

0 commit comments

Comments
 (0)