Skip to content

Commit 98cf6f1

Browse files
committed
Added SpatialAdaptiveMaxPooling
1 parent e8fadc6 commit 98cf6f1

6 files changed

+361
-2
lines changed

SpatialAdaptiveMaxPooling.lua

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
local SpatialAdaptiveMaxPooling, parent = torch.class('nn.SpatialAdaptiveMaxPooling', 'nn.Module')
2+
3+
function SpatialAdaptiveMaxPooling:__init(W, H)
4+
parent.__init(self)
5+
6+
self.W = W
7+
self.H = H
8+
9+
self.indices = torch.Tensor()
10+
end
11+
12+
function SpatialAdaptiveMaxPooling:updateOutput(input)
13+
input.nn.SpatialAdaptiveMaxPooling_updateOutput(self, input)
14+
return self.output
15+
end
16+
17+
function SpatialAdaptiveMaxPooling:updateGradInput(input, gradOutput)
18+
input.nn.SpatialAdaptiveMaxPooling_updateGradInput(self, input, gradOutput)
19+
return self.gradInput
20+
end
21+
22+
function SpatialAdaptiveMaxPooling:empty()
23+
self.gradInput:resize()
24+
self.gradInput:storage():resize(0)
25+
self.output:resize()
26+
self.output:storage():resize(0)
27+
self.indices:resize()
28+
self.indices:storage():resize(0)
29+
end

doc/convolution.md

+26-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ A convolution is an integral that expresses the amount of overlap of one functio
1313
* [SpatialSubSampling](#nn.SpatialSubSampling) : a 2D sub-sampling over an input image ;
1414
* [SpatialMaxPooling](#nn.SpatialMaxPooling) : a 2D max-pooling operation over an input image ;
1515
* [SpatialAveragePooling](#nn.SpatialAveragePooling) : a 2D average-pooling operation over an input image ;
16+
* [SpatialAdaptiveMaxPooling](#nn.SpatialAdaptiveMaxPooling) : a 2D max-pooling operation which adapts its parameters dynamically such that the output is of fixed size ;
1617
* [SpatialLPPooling](#nn.SpatialLPPooling) : computes the `p` norm in a convolutional manner on a set of input images ;
1718
* [SpatialConvolutionMap](#nn.SpatialConvolutionMap) : a 2D convolution that uses a generic connection table ;
1819
* [SpatialZeroPadding](#nn.SpatialZeroPadding) : padds a feature map with specified number of zeros ;
@@ -368,6 +369,29 @@ Applies 2D average-pooling operation in `kWxkH` regions by step size
368369
`dWxdH` steps. The number of output features is equal to the number of
369370
input planes.
370371

372+
<a name="nn.SpatialAdaptiveMaxPooling"/>
373+
### SpatialAdaptiveMaxPooling ###
374+
375+
```lua
376+
module = nn.SpatialAdaptiveMaxPooling(W, H)
377+
```
378+
379+
Applies 2D max-pooling operation in an image such that the output is of
380+
size `WxH`, for any input size. The number of output features is equal
381+
to the number of input planes.
382+
383+
For an output of dimensions `(owidth,oheight)`, the indexes of the pooling
384+
region `(j,i)` in the input image of dimensions `(iwidth,iheight)` are
385+
given by:
386+
387+
```
388+
x_j_start = floor((j /owidth) * iwidth)
389+
x_j_end = ceil(((j+1)/owidth) * iwidth)
390+
391+
y_i_start = floor((i /oheight) * iheight)
392+
y_i_end = ceil(((i+1)/oheight) * iheight)
393+
```
394+
371395
<a name="nn.SpatialSubSampling"/>
372396
### SpatialSubSampling ###
373397

@@ -419,8 +443,8 @@ Applies a 2D up-sampling over an input image composed of several input planes. T
419443
`forward(input)` is expected to be a 3D or 4D tensor (i.e. for 4D: `nBatchPlane x nInputPlane x height x width`). The number of output planes will be the same. The v dimension is assumed to be the second last dimension (i.e. for 4D it will be the 3rd dim), and the u dimension is assumed to be the last dimension.
420444

421445
The parameters are the following:
422-
* `scale`: The upscale ratio. Must be a positive integer
423-
446+
* `scale`: The upscale ratio. Must be a positive integer
447+
424448
The up-scaling method is simple nearest neighbor, ie:
425449

426450
```lua

generic/SpatialAdaptiveMaxPooling.c

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
#ifndef TH_GENERIC_FILE
2+
#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c"
3+
#else
4+
5+
static void nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(real *input_p,real *output_p,
6+
real *indx_p, real *indy_p,
7+
long nslices,
8+
long iwidth, long iheight,
9+
long owidth, long oheight)
10+
{
11+
long k;
12+
#pragma omp parallel for private(k)
13+
for (k = 0; k < nslices; k++)
14+
{
15+
/* loop over output */
16+
long i, j;
17+
for(i = 0; i < oheight; i++)
18+
{
19+
int y_start = (int)floor((float)i / oheight * iheight);
20+
int y_end = (int)ceil((float)(i + 1) / oheight * iheight);
21+
int kH = y_end-y_start;
22+
23+
for(j = 0; j < owidth; j++)
24+
{
25+
26+
int x_start = (int)floor((float)j / owidth * iwidth);
27+
int x_end = (int)ceil((float)(j + 1) / owidth * iwidth);
28+
int kW = x_end-x_start;
29+
30+
/* local pointers */
31+
real *ip = input_p + k*iwidth*iheight + y_start*iwidth + x_start;
32+
real *op = output_p + k*owidth*oheight + i*owidth + j;
33+
real *indyp = indy_p + k*owidth*oheight + i*owidth + j;
34+
real *indxp = indx_p + k*owidth*oheight + i*owidth + j;
35+
36+
/* compute local max: */
37+
long maxindex = -1;
38+
real maxval = -FLT_MAX;
39+
long tcntr = 0;
40+
int x,y;
41+
for(y = 0; y < kH; y++)
42+
{
43+
for(x = 0; x < kW; x++)
44+
{
45+
real val = *(ip + y*iwidth + x);
46+
if (val > maxval)
47+
{
48+
maxval = val;
49+
maxindex = tcntr;
50+
}
51+
tcntr++;
52+
}
53+
}
54+
55+
/* set output to local max */
56+
*op = maxval;
57+
58+
/* store location of max (x,y) */
59+
*indyp = (int)(maxindex / kW)+1;
60+
*indxp = (maxindex % kW) +1;
61+
}
62+
}
63+
}
64+
}
65+
66+
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L)
67+
{
68+
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
69+
long oheight = luaT_getfieldcheckint(L, 1, "H");
70+
long owidth = luaT_getfieldcheckint(L, 1, "W");
71+
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
72+
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
73+
int dimw = 2;
74+
int dimh = 1;
75+
long nbatch = 1;
76+
long nslices;
77+
long iheight;
78+
long iwidth;
79+
80+
real *input_data;
81+
real *output_data;
82+
real *indices_data;
83+
84+
85+
luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
86+
87+
if (input->nDimension == 4)
88+
{
89+
nbatch = input->size[0];
90+
dimw++;
91+
dimh++;
92+
}
93+
94+
/* sizes */
95+
nslices = input->size[dimh-1];
96+
iheight = input->size[dimh];
97+
iwidth = input->size[dimw];
98+
99+
/* get contiguous input */
100+
input = THTensor_(newContiguous)(input);
101+
102+
/* resize output */
103+
if (input->nDimension == 3)
104+
{
105+
THTensor_(resize3d)(output, nslices, oheight, owidth);
106+
/* indices will contain i,j locations for each output point */
107+
THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
108+
109+
input_data = THTensor_(data)(input);
110+
output_data = THTensor_(data)(output);
111+
indices_data = THTensor_(data)(indices);
112+
113+
nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
114+
indices_data+nslices*owidth*oheight, indices_data,
115+
nslices,
116+
iwidth, iheight,
117+
owidth, oheight);
118+
}
119+
else
120+
{
121+
long p;
122+
123+
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
124+
/* indices will contain i,j locations for each output point */
125+
THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
126+
127+
input_data = THTensor_(data)(input);
128+
output_data = THTensor_(data)(output);
129+
indices_data = THTensor_(data)(indices);
130+
131+
#pragma omp parallel for private(p)
132+
for (p = 0; p < nbatch; p++)
133+
{
134+
nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
135+
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
136+
nslices,
137+
iwidth, iheight,
138+
owidth, oheight);
139+
}
140+
}
141+
142+
/* cleanup */
143+
THTensor_(free)(input);
144+
return 1;
145+
}
146+
147+
148+
149+
static void nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
150+
real *indx_p, real *indy_p,
151+
long nslices,
152+
long iwidth, long iheight,
153+
long owidth, long oheight)
154+
{
155+
long k;
156+
#pragma omp parallel for private(k)
157+
for (k = 0; k < nslices; k++)
158+
{
159+
real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
160+
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
161+
real *indx_p_k = indx_p + k*owidth*oheight;
162+
real *indy_p_k = indy_p + k*owidth*oheight;
163+
164+
/* calculate max points */
165+
long i, j;
166+
for(i = 0; i < oheight; i++)
167+
{
168+
int y_start = (int)floor((float) i / oheight * iheight);
169+
for(j = 0; j < owidth; j++)
170+
{
171+
int x_start = (int)floor((float) j / owidth * iwidth);
172+
/* retrieve position of max */
173+
long maxi = indy_p_k[i*owidth + j] - 1 + y_start;
174+
long maxj = indx_p_k[i*owidth + j] - 1 + x_start;
175+
176+
/* update gradient */
177+
gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
178+
}
179+
}
180+
}
181+
}
182+
183+
static int nn_(SpatialAdaptiveMaxPooling_updateGradInput)(lua_State *L)
184+
{
185+
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
186+
THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
187+
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
188+
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
189+
int dimw = 2;
190+
int dimh = 1;
191+
long nbatch = 1;
192+
int nslices;
193+
int iheight;
194+
int iwidth;
195+
int oheight;
196+
int owidth;
197+
real *gradInput_data;
198+
real *gradOutput_data;
199+
real *indices_data;
200+
201+
/* get contiguous gradOutput */
202+
gradOutput = THTensor_(newContiguous)(gradOutput);
203+
204+
/* resize */
205+
THTensor_(resizeAs)(gradInput, input);
206+
THTensor_(zero)(gradInput);
207+
208+
if (input->nDimension == 4) {
209+
nbatch = input->size[0];
210+
dimw++;
211+
dimh++;
212+
}
213+
214+
/* sizes */
215+
nslices = input->size[dimh-1];
216+
iheight = input->size[dimh];
217+
iwidth = input->size[dimw];
218+
oheight = gradOutput->size[dimh];
219+
owidth = gradOutput->size[dimw];
220+
221+
/* get raw pointers */
222+
gradInput_data = THTensor_(data)(gradInput);
223+
gradOutput_data = THTensor_(data)(gradOutput);
224+
indices_data = THTensor_(data)(indices);
225+
226+
/* backprop */
227+
if (input->nDimension == 3)
228+
{
229+
nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
230+
indices_data+nslices*owidth*oheight, indices_data,
231+
nslices,
232+
iwidth, iheight,
233+
owidth, oheight);
234+
}
235+
else
236+
{
237+
long p;
238+
#pragma omp parallel for private(p)
239+
for (p = 0; p < nbatch; p++)
240+
{
241+
nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
242+
indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
243+
nslices,
244+
iwidth, iheight,
245+
owidth, oheight);
246+
}
247+
}
248+
249+
/* cleanup */
250+
THTensor_(free)(gradOutput);
251+
252+
return 1;
253+
}
254+
255+
static const struct luaL_Reg nn_(SpatialAdaptiveMaxPooling__) [] = {
256+
{"SpatialAdaptiveMaxPooling_updateOutput", nn_(SpatialAdaptiveMaxPooling_updateOutput)},
257+
{"SpatialAdaptiveMaxPooling_updateGradInput", nn_(SpatialAdaptiveMaxPooling_updateGradInput)},
258+
{NULL, NULL}
259+
};
260+
261+
static void nn_(SpatialAdaptiveMaxPooling_init)(lua_State *L)
262+
{
263+
luaT_pushmetatable(L, torch_Tensor);
264+
luaT_registeratname(L, nn_(SpatialAdaptiveMaxPooling__), "nn");
265+
lua_pop(L,1);
266+
}
267+
268+
#endif
269+

init.c

+5
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@
101101
#include "generic/SpatialAveragePooling.c"
102102
#include "THGenerateFloatTypes.h"
103103

104+
#include "generic/SpatialAdaptiveMaxPooling.c"
105+
#include "THGenerateFloatTypes.h"
106+
104107
#include "generic/VolumetricConvolution.c"
105108
#include "THGenerateFloatTypes.h"
106109

@@ -159,6 +162,7 @@ int luaopen_libnn(lua_State *L)
159162
nn_FloatSpatialSubSampling_init(L);
160163
nn_FloatSpatialMaxPooling_init(L);
161164
nn_FloatSpatialAveragePooling_init(L);
165+
nn_FloatSpatialAdaptiveMaxPooling_init(L);
162166
nn_FloatVolumetricConvolution_init(L);
163167
nn_FloatVolumetricMaxPooling_init(L);
164168
nn_FloatMultiMarginCriterion_init(L);
@@ -198,6 +202,7 @@ int luaopen_libnn(lua_State *L)
198202
nn_DoubleSpatialSubSampling_init(L);
199203
nn_DoubleSpatialMaxPooling_init(L);
200204
nn_DoubleSpatialAveragePooling_init(L);
205+
nn_DoubleSpatialAdaptiveMaxPooling_init(L);
201206
nn_DoubleVolumetricConvolution_init(L);
202207
nn_DoubleVolumetricMaxPooling_init(L);
203208
nn_DoubleMultiMarginCriterion_init(L);

init.lua

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ include('SpatialMaxPooling.lua')
7676
include('SpatialMaxPoolingCUDA.lua')
7777
include('SpatialLPPooling.lua')
7878
include('SpatialAveragePooling.lua')
79+
include('SpatialAdaptiveMaxPooling.lua')
7980
include('TemporalConvolution.lua')
8081
include('TemporalSubSampling.lua')
8182
include('TemporalMaxPooling.lua')

0 commit comments

Comments
 (0)