Skip to content

Commit 3413971

Browse files
authored
Link hifi ops to jarvis
Differential Revision: D68471314 Pull Request resolved: #8045
1 parent 16889b0 commit 3413971

File tree

5 files changed

+238
-7
lines changed

5 files changed

+238
-7
lines changed

backends/cadence/hifi/operators/op_mean.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ int prepare_data(
5656
return num_axis_dims;
5757
}
5858

59-
Tensor& mean_dim_out(
59+
Tensor& mean_out(
6060
RuntimeContext& ctx,
6161
const Tensor& in,
6262
optional<ArrayRef<int64_t>> dim_list,

backends/cadence/hifi/operators/op_quantized_relu_out.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void quantized_relu_(
4545
}
4646
}
4747

48-
void quantized_relu_out(
48+
void quantized_relu_per_tensor_out(
4949
KernelRuntimeContext& ctx,
5050
const Tensor& input,
5151
const Tensor& in_zero_point,
@@ -100,4 +100,4 @@ void quantized_relu_out(
100100
} // namespace native
101101
} // namespace HiFi
102102
} // namespace impl
103-
} // namespace cadence
103+
} // namespace cadence

backends/cadence/hifi/operators/op_remainder.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@
88

99
#include <cmath>
1010

11+
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
1112
#include <executorch/kernels/portable/cpu/scalar_utils.h>
1213
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
1314
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1415
#include <executorch/kernels/portable/cpu/util/functional_util.h>
1516
#include <executorch/kernels/portable/cpu/util/math_util.h>
1617
#include <executorch/runtime/kernel/kernel_includes.h>
1718

18-
#include "kernels.h"
19-
2019
using executorch::aten::RuntimeContext;
2120
using executorch::aten::Scalar;
2221
using executorch::aten::ScalarType;

backends/cadence/hifi/operators/op_softmax.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
#include <cmath>
1010

11+
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
1112
#include <executorch/kernels/portable/cpu/util/activation_ops_util.h>
1213
#include <executorch/kernels/portable/cpu/util/functional_util.h>
1314
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
1415
#include <executorch/runtime/kernel/kernel_includes.h>
15-
#include "kernels.h"
1616

1717
using executorch::aten::ScalarType;
1818
using executorch::aten::Tensor;
@@ -24,7 +24,7 @@ namespace impl {
2424
namespace HiFi {
2525
namespace native {
2626

27-
Tensor& softmax_out(
27+
Tensor& _softmax_out(
2828
KernelRuntimeContext& ctx,
2929
const Tensor& in,
3030
int64_t dim,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2018-2024 Cadence Design Systems, Inc.
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining
5+
* a copy of this software and associated documentation files (the
6+
* "Software"), to use this Software with Cadence processor cores only and
7+
* not with any other processors and platforms, subject to
8+
* the following conditions:
9+
*
10+
* The above copyright notice and this permission notice shall be included
11+
* in all copies or substantial portions of the Software.
12+
*
13+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
17+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
18+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
19+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20+
21+
******************************************************************************/
22+
#include "xa_nnlib_common.h"
23+
24+
#include <string.h>
25+
26+
/*
27+
* Currently only supports upto 5D input tensors.
28+
* 1/2/3/4 D input tensors will be scaled up to 5D.
29+
* For example, 2x3 -> 1x1x1x2x3.
30+
*/
31+
32+
WORD32 xa_nn_transpose_8_8(WORD8 * __restrict__ p_out
33+
,const WORD32 *const p_out_shape
34+
,const WORD8 * __restrict__ p_inp
35+
,const WORD32 *const p_inp_shape
36+
,const WORD32 * __restrict__ p_permute_vec
37+
,WORD32 num_out_dims
38+
,WORD32 num_inp_dims)
39+
{
40+
/* NULL pointer checks */
41+
XA_NNLIB_ARG_CHK_PTR(p_out, -1);
42+
XA_NNLIB_ARG_CHK_PTR(p_inp, -1);
43+
XA_NNLIB_ARG_CHK_PTR(p_permute_vec, -1);
44+
XA_NNLIB_ARG_CHK_PTR(p_out_shape, -1);
45+
XA_NNLIB_ARG_CHK_PTR(p_inp_shape, -1);
46+
47+
/* Invalid input checks */
48+
XA_NNLIB_ARG_CHK_COND(((num_inp_dims <= 0) || (num_inp_dims > 5)), -1);
49+
XA_NNLIB_ARG_CHK_COND((num_out_dims != num_inp_dims), -1);
50+
51+
int itr = 0;
52+
for(itr=0; itr < num_inp_dims; itr++)
53+
{
54+
XA_NNLIB_ARG_CHK_COND((p_inp_shape[itr] <= 0), -1);
55+
}
56+
for(itr=0; itr < num_out_dims; itr++)
57+
{
58+
XA_NNLIB_ARG_CHK_COND((p_out_shape[itr] <= 0), -1);
59+
}
60+
61+
/* Output shape provided must be correct based on input
62+
* shape and permute values */
63+
for(itr=0; itr < num_out_dims; itr++)
64+
{
65+
int output_dim = p_out_shape[itr];
66+
int expected_dim = p_inp_shape[p_permute_vec[itr]];
67+
XA_NNLIB_ARG_CHK_COND((output_dim != expected_dim), -1);
68+
}
69+
70+
/* Pointer alignment checks */
71+
XA_NNLIB_ARG_CHK_ALIGN(p_out, sizeof(WORD8), -1);
72+
XA_NNLIB_ARG_CHK_ALIGN(p_inp, sizeof(WORD8), -1);
73+
XA_NNLIB_ARG_CHK_ALIGN(p_permute_vec, sizeof(WORD32), -1);
74+
XA_NNLIB_ARG_CHK_ALIGN(p_out_shape, sizeof(WORD32), -1);
75+
XA_NNLIB_ARG_CHK_ALIGN(p_inp_shape, sizeof(WORD32), -1);
76+
77+
/* Shift all dim with 1 in the outer part */
78+
int eff_output_shape[5];
79+
int eff_permute_vec[5];
80+
81+
for(int i = 0; i < num_out_dims; i++)
82+
{
83+
eff_output_shape[i] = p_out_shape[i];
84+
eff_permute_vec[i] = p_permute_vec[i];
85+
}
86+
87+
int one_i=num_out_dims-1, non_one_i=num_out_dims-1;
88+
while(one_i > 0 && non_one_i >=0){
89+
while(one_i > 0 && eff_output_shape[one_i]!=1){
90+
one_i--;
91+
}
92+
non_one_i = one_i;
93+
while(non_one_i >= 0 && eff_output_shape[non_one_i]==1)
94+
{
95+
non_one_i--;
96+
}
97+
if(one_i > 0 && non_one_i >=0){
98+
int temp;
99+
/*swap output_shape*/
100+
{
101+
temp = eff_output_shape[one_i];
102+
eff_output_shape[one_i] = eff_output_shape[non_one_i];
103+
eff_output_shape[non_one_i] = temp;
104+
}
105+
/*swap permute_vec*/
106+
{
107+
temp = eff_permute_vec[one_i];
108+
eff_permute_vec[one_i] = eff_permute_vec[non_one_i];
109+
eff_permute_vec[non_one_i] = temp;
110+
}
111+
112+
}
113+
}
114+
115+
116+
/* Promoting lesser dim tensors to 5D tensors.
117+
* Also updating the permute_vec and shapes as needed for optimization */
118+
int p_5D_inp_shape[5] = {1, 1, 1, 1, 1};
119+
int p_5D_out_shape[5] = {1, 1, 1, 1, 1};
120+
int p_5D_permute_vec[5] = {0, 1, 2, 3, 4};
121+
122+
/* Check if any inner inp dimension is same in the output */
123+
int last_dim_same = 1, last_n_same_dim = 0;
124+
itr = num_inp_dims - 1;
125+
while(itr >= 0)
126+
{
127+
last_n_same_dim = (last_dim_same && (eff_permute_vec[itr] == itr)) ? (last_n_same_dim + 1) : last_n_same_dim;
128+
last_dim_same = (eff_permute_vec[itr] == itr) ? last_dim_same & 1 : last_dim_same & 0;
129+
itr--;
130+
}
131+
132+
int dims_added = 5 - num_inp_dims;
133+
itr = num_inp_dims - 1;
134+
int same_count = last_n_same_dim;
135+
int count = 4;
136+
while(itr >= 0)
137+
{
138+
p_5D_inp_shape[count] = (same_count > 0) ? p_5D_inp_shape[count]*p_inp_shape[itr] : p_inp_shape[itr];
139+
p_5D_out_shape[count] = (same_count > 0) ? p_5D_out_shape[count]*eff_output_shape[itr] : eff_output_shape[itr];
140+
same_count--;
141+
itr--;
142+
count = (same_count > 0) ? count : count - 1;
143+
}
144+
145+
itr = num_inp_dims - 1;
146+
same_count = (last_n_same_dim) ? num_inp_dims - (last_n_same_dim - 1) : 0;
147+
count = 4;
148+
while(itr >= 0)
149+
{
150+
p_5D_permute_vec[count] = (same_count > 0) ? eff_permute_vec[itr-(last_n_same_dim - 1)] + dims_added + last_n_same_dim - 1 : eff_permute_vec[itr] + dims_added;
151+
same_count--;
152+
itr--;
153+
count--;
154+
}
155+
156+
int out_dim0, out_dim1, out_dim2, out_dim3, out_dim4;
157+
int inp_dim1, inp_dim2, inp_dim3, inp_dim4;
158+
int inp_stride[5];
159+
160+
out_dim0 = p_5D_out_shape[0];
161+
out_dim1 = p_5D_out_shape[1];
162+
out_dim2 = p_5D_out_shape[2];
163+
out_dim3 = p_5D_out_shape[3];
164+
out_dim4 = p_5D_out_shape[4];
165+
166+
inp_dim1 = p_5D_inp_shape[1];
167+
inp_dim2 = p_5D_inp_shape[2];
168+
inp_dim3 = p_5D_inp_shape[3];
169+
inp_dim4 = p_5D_inp_shape[4];
170+
171+
inp_stride[0] = inp_dim1*inp_dim2*inp_dim3*inp_dim4;
172+
inp_stride[1] = inp_dim2*inp_dim3*inp_dim4;
173+
inp_stride[2] = inp_dim3*inp_dim4;
174+
inp_stride[3] = inp_dim4;
175+
inp_stride[4] = 1;
176+
177+
if(last_n_same_dim)
178+
{
179+
int itr0, itr1, itr2, itr3;
180+
WORD8 *p_inp0 = (WORD8*)p_inp;
181+
for(itr0 = 0; itr0 < out_dim0; itr0++)
182+
{
183+
WORD8 *p_inp1 = p_inp0+(itr0*inp_stride[p_5D_permute_vec[0]]);
184+
#pragma loop_count min=1
185+
for(itr1 = 0; itr1 < out_dim1; itr1++)
186+
{
187+
WORD8 *p_inp2 = p_inp1+(itr1*inp_stride[p_5D_permute_vec[1]]);
188+
#pragma loop_count min=1
189+
for(itr2 = 0; itr2 < out_dim2; itr2++)
190+
{
191+
WORD8 *p_inp3 = p_inp2+(itr2*inp_stride[p_5D_permute_vec[2]]);
192+
#pragma loop_count min=1
193+
for(itr3 = 0; itr3 < out_dim3; itr3++, p_out+=out_dim4)
194+
{
195+
WORD8 *p_inp4 = p_inp3+(itr3*inp_stride[p_5D_permute_vec[3]]);
196+
memcpy(p_out, p_inp4, out_dim4);
197+
}
198+
}
199+
}
200+
}
201+
}
202+
else
203+
{
204+
int itr0, itr1, itr2, itr3, itr4;
205+
WORD8 *p_inp0 = (WORD8*)p_inp;
206+
for(itr0 = 0; itr0 < out_dim0; itr0++)
207+
{
208+
WORD8 *p_inp1 = p_inp0+(itr0*inp_stride[p_5D_permute_vec[0]]);
209+
for(itr1 = 0; itr1 < out_dim1; itr1++)
210+
{
211+
WORD8 *p_inp2 = p_inp1+(itr1*inp_stride[p_5D_permute_vec[1]]);
212+
for(itr2 = 0; itr2 < out_dim2; itr2++)
213+
{
214+
WORD8 *p_inp3 = p_inp2+(itr2*inp_stride[p_5D_permute_vec[2]]);
215+
for(itr3 = 0; itr3 < out_dim3; itr3++)
216+
{
217+
WORD8 *p_inp4 = p_inp3+(itr3*inp_stride[p_5D_permute_vec[3]]);
218+
for(itr4 = 0; itr4 < out_dim4; itr4++)
219+
{
220+
WORD8 d0 = *(p_inp4);
221+
p_inp4 += inp_stride[p_5D_permute_vec[4]];
222+
*p_out++ = d0;
223+
224+
}
225+
}
226+
}
227+
}
228+
}
229+
}
230+
231+
return 0;
232+
}

0 commit comments

Comments
 (0)