Skip to content

Commit

Permalink
Merge branch 'Tencent:master' into arm-conv-unified-elempack-4
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored Oct 7, 2023
2 parents 2be95bf + 54a9a56 commit 39f7199
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 7 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
brew uninstall --ignore-dependencies libomp
- name: Build wheels
uses: pypa/cibuildwheel@v2.15.0
uses: pypa/cibuildwheel@v2.16.2
env:
CIBW_ARCHS_MACOS: ${{ matrix.arch }}
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
Expand Down Expand Up @@ -124,7 +124,7 @@ jobs:
platforms: all

- name: Build wheels
uses: pypa/cibuildwheel@v2.15.0
uses: pypa/cibuildwheel@v2.16.2
env:
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
CIBW_BUILD: ${{ matrix.build }}
Expand Down
85 changes: 85 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1631,7 +1631,92 @@ cooling_down = 1
vision_transformer min = 6605.19 max = 6606.66 avg = 6605.73
FastestDet min = 52.11 max = 52.97 avg = 52.61
```
### Raspberry Pi 5 Broadcom BCM2712, Cortex-A76 (ARMv8) (2.4GHz x 4)
```
pi@raspberrypi:~/ncnn/benchmark $ ./benchncnn 10 4 0 -1 1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
squeezenet min = 8.56 max = 8.65 avg = 8.61
squeezenet_int8 min = 11.65 max = 12.64 avg = 11.94
mobilenet min = 11.32 max = 13.46 avg = 11.75
mobilenet_int8 min = 11.30 max = 11.60 avg = 11.45
mobilenet_v2 min = 13.57 max = 13.77 avg = 13.63
mobilenet_v3 min = 9.18 max = 10.52 avg = 9.48
shufflenet min = 4.56 max = 6.19 avg = 5.98
shufflenet_v2 min = 5.04 max = 5.13 avg = 5.09
mnasnet min = 8.27 max = 9.86 avg = 8.65
proxylessnasnet min = 9.36 max = 11.18 avg = 9.62
efficientnet_b0 min = 14.77 max = 14.96 avg = 14.87
efficientnetv2_b0 min = 19.91 max = 20.11 avg = 19.99
regnety_400m min = 11.91 max = 12.10 avg = 11.96
blazeface min = 2.26 max = 2.29 avg = 2.28
googlenet min = 32.80 max = 33.17 avg = 32.97
googlenet_int8 min = 32.63 max = 32.99 avg = 32.78
resnet18 min = 23.95 max = 24.21 avg = 24.12
resnet18_int8 min = 32.50 max = 32.79 avg = 32.68
alexnet min = 25.31 max = 25.75 avg = 25.51
vgg16 min = 162.19 max = 165.08 avg = 163.75
vgg16_int8 min = 187.46 max = 191.21 avg = 189.09
resnet50 min = 55.95 max = 56.61 avg = 56.29
resnet50_int8 min = 73.34 max = 73.97 avg = 73.59
squeezenet_ssd min = 40.48 max = 41.39 avg = 40.92
squeezenet_ssd_int8 min = 45.67 max = 46.35 avg = 46.06
mobilenet_ssd min = 31.15 max = 31.73 avg = 31.48
mobilenet_ssd_int8 min = 31.09 max = 31.44 avg = 31.27
mobilenet_yolo min = 71.51 max = 72.38 avg = 71.95
mobilenetv2_yolov3 min = 47.86 max = 48.41 avg = 48.04
yolov4-tiny min = 55.95 max = 56.51 avg = 56.19
nanodet_m min = 14.26 max = 14.68 avg = 14.48
yolo-fastest-1.1 min = 6.48 max = 8.10 avg = 7.30
yolo-fastestv2 min = 6.03 max = 7.33 avg = 7.04
vision_transformer min = 613.62 max = 637.97 avg = 629.51
FastestDet min = 6.53 max = 6.66 avg = 6.59
pi@raspberrypi:~/ncnn/benchmark $ ./benchncnn 10 1 0 -1 1
loop_count = 10
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 1
squeezenet min = 13.18 max = 13.27 avg = 13.22
squeezenet_int8 min = 15.69 max = 15.93 avg = 15.78
mobilenet min = 21.42 max = 21.55 avg = 21.46
mobilenet_int8 min = 14.92 max = 20.91 avg = 17.34
mobilenet_v2 min = 18.56 max = 23.06 avg = 19.24
mobilenet_v3 min = 13.16 max = 13.33 avg = 13.25
shufflenet min = 7.25 max = 11.14 avg = 8.43
shufflenet_v2 min = 7.17 max = 11.15 avg = 7.70
mnasnet min = 13.89 max = 13.94 avg = 13.91
proxylessnasnet min = 17.01 max = 17.26 avg = 17.07
efficientnet_b0 min = 26.19 max = 26.30 avg = 26.24
efficientnetv2_b0 min = 39.69 max = 40.12 avg = 39.97
regnety_400m min = 17.30 max = 17.44 avg = 17.36
blazeface min = 4.74 max = 4.78 avg = 4.76
googlenet min = 57.64 max = 57.84 avg = 57.72
googlenet_int8 min = 55.80 max = 56.01 avg = 55.93
resnet18 min = 31.90 max = 32.09 avg = 32.00
resnet18_int8 min = 56.92 max = 57.16 avg = 57.01
alexnet min = 39.84 max = 40.12 avg = 39.92
vgg16 min = 208.33 max = 211.06 avg = 209.64
vgg16_int8 min = 437.53 max = 440.55 avg = 439.35
resnet50 min = 95.75 max = 96.68 avg = 96.28
resnet50_int8 min = 116.80 max = 118.01 avg = 117.57
squeezenet_ssd min = 47.75 max = 47.97 avg = 47.86
squeezenet_ssd_int8 min = 61.98 max = 62.90 avg = 62.47
mobilenet_ssd min = 52.83 max = 53.39 avg = 53.07
mobilenet_ssd_int8 min = 46.15 max = 46.60 avg = 46.35
mobilenet_yolo min = 117.68 max = 117.97 avg = 117.81
mobilenetv2_yolov3 min = 67.37 max = 67.67 avg = 67.48
yolov4-tiny min = 73.85 max = 74.35 avg = 74.10
nanodet_m min = 22.78 max = 23.33 avg = 22.96
yolo-fastest-1.1 min = 8.82 max = 8.91 avg = 8.87
yolo-fastestv2 min = 8.18 max = 11.42 avg = 8.59
vision_transformer min = 1267.90 max = 1269.45 avg = 1268.82
FastestDet min = 7.79 max = 11.14 avg = 9.03
```
### Raspberry Pi Zero 2 W Broadcom BCM2710A1, Cortex-A53 (ARMv8) (1.0GHz x 4)

```
Expand Down
9 changes: 5 additions & 4 deletions src/layer/memorydata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ int MemoryData::load_param(const ParamDict& pd)
h = pd.get(1, 0);
d = pd.get(11, 0);
c = pd.get(2, 0);
load_type = pd.get(21, 1);

return 0;
}
Expand All @@ -36,19 +37,19 @@ int MemoryData::load_model(const ModelBin& mb)
{
if (d != 0)
{
data = mb.load(w, h, d, c, 1);
data = mb.load(w, h, d, c, load_type);
}
else if (c != 0)
{
data = mb.load(w, h, c, 1);
data = mb.load(w, h, c, load_type);
}
else if (h != 0)
{
data = mb.load(w, h, 1);
data = mb.load(w, h, load_type);
}
else if (w != 0)
{
data = mb.load(w, 1);
data = mb.load(w, load_type);
}
else // 0 0 0
{
Expand Down
1 change: 1 addition & 0 deletions src/layer/memorydata.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class MemoryData : public Layer
int h;
int d;
int c;
int load_type;

Mat data;
};
Expand Down
2 changes: 1 addition & 1 deletion tests/test_convolution1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static int test_convolution1d_0()
const int s = kdsp[i][2];
const int p = kdsp[i][3];
const int b0 = i % 2;
const int b1 = 1 - b1;
const int b1 = 1 - b0;

int ret = 0
|| test_convolution1d(9, 1, 1, k, d, s, p, b0)
Expand Down
1 change: 1 addition & 0 deletions tools/pnnx/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ set(pnnx_pass_level2_SRCS
pass_level2/F_mish.cpp
pass_level2/F_normalize.cpp
pass_level2/F_pad.cpp
pass_level2/F_pairwise_distance.cpp
pass_level2/F_pixel_shuffle.cpp
pass_level2/F_pixel_unshuffle.cpp
pass_level2/F_prelu.cpp
Expand Down
44 changes: 44 additions & 0 deletions tools/pnnx/src/pass_level2/F_pairwise_distance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "pass_level2.h"

namespace pnnx {

class F_pairwise_distance : public GraphRewriterPass
{
public:
const char* match_pattern_graph() const
{
return R"PNNXIR(7767517
7 6
pnnx.Input input_0 0 1 x1
pnnx.Input input_1 0 1 x2
prim::Constant op_0 0 1 p value=%p
prim::Constant op_1 0 1 eps value=%eps
prim::Constant op_2 0 1 keepdim value=%keepdim
aten::pairwise_distance op_3 5 1 x1 x2 p eps keepdim out
pnnx.Output output 1 0 out
)PNNXIR";
}

const char* type_str() const
{
return "F.pairwise_distance";
}
};

REGISTER_GLOBAL_PNNX_GRAPH_REWRITER_PASS(F_pairwise_distance, 10)

} // namespace pnnx
1 change: 1 addition & 0 deletions tools/pnnx/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ pnnx_add_test(F_max_pool2d)
pnnx_add_test(F_max_pool3d)
pnnx_add_test(F_normalize)
pnnx_add_test(F_pad)
pnnx_add_test(F_pairwise_distance)
pnnx_add_test(F_pixel_shuffle)
pnnx_add_test(F_pixel_unshuffle)
pnnx_add_test(F_prelu)
Expand Down
58 changes: 58 additions & 0 deletions tools/pnnx/tests/test_F_pairwise_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Tencent is pleased to support the open source community by making ncnn available.
#
# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
#
# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()

def forward(self, x, y):
z1 = F.pairwise_distance(x,y,p=1,keepdim=False)
z2 = F.pairwise_distance(x,y,p=2,keepdim=True)
z3 = F.pairwise_distance(x,y)
z4 = F.pairwise_distance(x,y,eps = 1e-3)
return z1,z2,z3,z4

def test():
net = Model()
net.eval()

torch.manual_seed(0)
x = torch.rand(12, 128, 128)
y = torch.rand(12, 128, 128)

a0,a1,a2,a3 = net(x, y)

# export torchscript
mod = torch.jit.trace(net, (x, y))
mod.save("test_F_pairwise_distance.pt")

# torchscript to pnnx
import os
os.system("../src/pnnx test_F_pairwise_distance.pt inputshape=[12,128,128],[12,128,128]")

# pnnx inference
import test_F_pairwise_distance_pnnx
b0,b1,b2,b3 = test_F_pairwise_distance_pnnx.test_inference()

return torch.equal(a0,b0) and torch.equal(a1,b1) and torch.equal(a2,b2) and torch.equal(a3,b3)

if __name__ == "__main__":
if test():
exit(0)
else:
exit(1)

0 comments on commit 39f7199

Please sign in to comment.