Skip to content

Commit

Permalink
WIP: flipped k, but it doesn't help
Browse files Browse the repository at this point in the history
  • Loading branch information
hunse committed Jun 2, 2016
1 parent 0a39f3d commit d57c0ba
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 25 deletions.
57 changes: 33 additions & 24 deletions nengo_ocl/clra_nonlinearities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,18 +1457,17 @@ def plan_conv2d(queue, X, Y, filters, biases, shape_in, shape_out,
__global ${type} *y
)
{
const int j = get_global_id(0);
const int i = get_global_id(1);
const int k = get_global_id(2);
const int j = get_global_id(1);
const int i = get_global_id(2);
const int k = get_global_id(0);
const int ij = i*${nyj} + j;
const int ijk = k*${nyi*nyj} + i*${nyj} + j;
const int tj = get_local_id(0);
const int ti = get_local_id(1);
const int tk = get_local_id(2);
const int lsizej = get_local_size(0);
const int lsizei = get_local_size(1);
const int lsizek = get_local_size(2);
const int tj = get_local_id(1);
const int ti = get_local_id(2);
const int tk = get_local_id(0);
const int lsizej = get_local_size(1);
const int lsizei = get_local_size(2);
const int lsizek = get_local_size(0);
const int lsizeij = lsizei*lsizej;
const int tij = tj + ti*lsizej;
Expand All @@ -1480,15 +1479,16 @@ def plan_conv2d(queue, X, Y, filters, biases, shape_in, shape_out,
const int i0 = (i - ti)*${sti} - ${pi};
__local ${type} patch[${nipatch}][${njpatch}];
% if conv:
__local ${type} filter[${nf_per}][${si*sj}];
f += k*${nc*si*sj};
__local ${type} filter[${si*sj}][${nf_per}];
f += k;
% else:
f += k*${nc*si*sj*nyi*nyj} + ij;
f += ij*${nf} + k;
% endif
x += ${xstart};
y += ${ystart};
${type} out = b[ijk];
const int out_ijk = k*${nyi*nyj} + i*${nyj} + j;
${type} out = b[out_ijk];
for (int c = 0; c < ${nc}; c++) {
Expand All @@ -1507,28 +1507,28 @@ def plan_conv2d(queue, X, Y, filters, biases, shape_in, shape_out,
% if conv:
// load filters
__global const ${type} *fc = f + c*${si*sj};
__global const ${type} *fc = f + c*${si*sj*nf};
for (int kij = tij; kij < ${si*sj}; kij += lsizeij)
filter[tk][kij] = fc[kij];
filter[kij][tk] = fc[kij*${nf}];
% else:
__global const ${type} *filter = f + c*${si*sj*nyi*nyj};
__global const ${type} *filter = f + c*${si*sj*nyi*nyj*nf};
% endif
barrier(CLK_LOCAL_MEM_FENCE);
for (int ii = 0; ii < ${si}; ii++)
for (int jj = 0; jj < ${sj}; jj++)
out += patch[${sti}*ti+ii][${stj}*tj+jj]
% if conv:
* filter[tk][ii*${sj}+jj];
* filter[ii*${sj}+jj][tk];
% else:
* filter[(ii*${sj}+jj)*${nyi*nyj}];
* filter[(ii*${sj}+jj)*${nyi*nyj*nf}];
% endif
barrier(CLK_LOCAL_MEM_FENCE);
}
if (i < ${nyi} && j < ${nyj} && k < ${nf})
y[ijk] = out;
y[out_ijk] = out;
}
"""

Expand All @@ -1542,10 +1542,19 @@ def plan_conv2d(queue, X, Y, filters, biases, shape_in, shape_out,
# max_group = get_mwgs(queue, cap=256)
assert max_group >= 32

lsize = (6, 6, 4)
gsize = (round_up(nyj, lsize[0]),
round_up(nyi, lsize[1]),
round_up(nf, lsize[2]))
# # lsize = (8, 8, 4)
# lsize = (4, 4, 16)
# gsize = (round_up(nyj, lsize[0]),
# round_up(nyi, lsize[1]),
# round_up(nf, lsize[2]))

lsize = (16, 4, 4)
# lsize = (32, 4, 4)
# lsize = (32, 4, 1)
gsize = (round_up(nf, lsize[0]),
round_up(nyj, lsize[1]),
round_up(nyi, lsize[2]),
)

# lsize0 = min(nyj, 32)
# lsize1 = min(max_group // lsize0, nyi, 32)
Expand Down
2 changes: 1 addition & 1 deletion nengo_ocl/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def _plan_Conv2d(self, ops):
X = self.all_data.getitem_device(self.sidx[op.input])
Y = self.all_data.getitem_device(self.sidx[op.output])
ftrans = np.asarray(np.transpose(
f, (0, 1, 2, 3) if conv else (0, 3, 4, 5, 1, 2)), order='C')
f, (1, 2, 3, 0) if conv else (3, 4, 5, 1, 2, 0)), order='C')
F = self.Array(ftrans.ravel())
B = self.Array((np.zeros(p.shape_out) + b).ravel())
plans.append(plan_conv2d(
Expand Down

0 comments on commit d57c0ba

Please sign in to comment.