diff --git a/Normalize.lua b/Normalize.lua index 24c1d07b4..abdb33ed5 100644 --- a/Normalize.lua +++ b/Normalize.lua @@ -1,26 +1,28 @@ local Normalize, parent = torch.class('nn.Normalize', 'nn.Module') +Normalize.__version = 2 -function Normalize:__init(p,eps) +function Normalize:__init(p, dim, eps) parent.__init(self) assert(p,'p-norm not provided') assert(p > 0, p..'-norm not supported') self.p = p + self.dim = dim or -1 self.eps = eps or 1e-10 + assert(self.dim % 1 == 0, 'dimension should be an integer') + assert(self.dim ~= 0, "dimension can't be 0") end function Normalize:updateOutput(input) - assert(input:dim() <= 2, 'only 1d layer supported') - local input_size = input:size() - if input:dim() == 1 then - input = input:view(1,-1) + assert(math.abs(self.dim) <= input:dim(), + 'input has less dimensions than the normalization dimension') + local dim = self.dim + if dim < 0 then + dim = input:dim() + dim + 1 end - self._output = self._output or input.new() self.norm = self.norm or input.new() self.buffer = self.buffer or input.new() - self._output:resizeAs(input) - if self.p == math.huge then -- specialization for the infinity norm self._indices = self._indices or @@ -28,7 +30,7 @@ function Normalize:updateOutput(input) torch.CudaTensor() or torch.LongTensor()) self.buffer:abs(input) - torch.max(self.norm, self._indices, self.buffer, 2) + torch.max(self.norm, self._indices, self.buffer, dim) self.norm:add(self.eps) else self.normp = self.normp or input.new() @@ -37,41 +39,35 @@ function Normalize:updateOutput(input) else self.buffer:pow(input,self.p) end - self.normp:sum(self.buffer,2):add(self.eps) + self.normp:sum(self.buffer, dim):add(self.eps) self.norm:pow(self.normp,1/self.p) end - self._output:cdiv(input, self.norm:view(-1,1):expandAs(input)) + self.output:cdiv(input, self.norm:expandAs(input)) - self.output:view(self._output, input_size) return self.output end function Normalize:updateGradInput(input, gradOutput) - assert(input:dim() <= 2, 'only 1d layer supported') - assert(gradOutput:dim() <= 2, 'only 1d layer supported') - - local input_size = input:size() - if input:dim() == 1 then - input = input:view(1,-1) + assert(math.abs(self.dim) <= input:dim(), + 'input has less dimensions than the normalization dimension') + local dim = self.dim + if dim < 0 then + dim = input:dim() + dim + 1 end - local n = input:size(1) -- batch size - local d = input:size(2) -- dimensionality of vectors - - self._gradInput = self._gradInput or input.new() self.cross = self.cross or input.new() -- compute diagonal term with gradOutput - self._gradInput:resize(n,d) + self.gradInput:resizeAs(input) if self.p == math.huge then -- specialization for the inf case - self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput) + self.gradInput:cmul(self.norm:expandAs(gradOutput),gradOutput) self.buffer:resizeAs(input):zero() - self.cross:resize(n,1) - self.cross:gather(input,2,self._indices) + self.cross:resizeAs(self.norm) + self.cross:gather(input,dim,self._indices) self.cross:cdiv(self.norm) - self.buffer:scatter(2,self._indices,self.cross) + self.buffer:scatter(dim,self._indices,self.cross) else - self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput) + self.gradInput:cmul(self.normp:expandAs(gradOutput), gradOutput) -- small optimizations for different p -- buffer = input*|input|^(p-2) if self.p % 2 ~= 0 then @@ -91,17 +87,17 @@ function Normalize:updateGradInput(input, gradOutput) end end -- compute cross term in two steps - self.cross:resize(n,1) + self.cross:resizeAs(self.norm) -- instead of having a huge temporary matrix (b1*b2), -- do the computations as b1*(b2*gradOutput). This avoids redundant -- computation and also a huge buffer of size n*d^2 self.buffer2 = self.buffer2 or input.new() -- nxd self.buffer2:cmul(input, gradOutput) - self.cross:sum(self.buffer2, 2) + self.cross:sum(self.buffer2, dim) self.buffer:cmul(self.cross:expandAs(self.buffer)) - self._gradInput:add(-1, self.buffer) + self.gradInput:add(-1, self.buffer) -- reuse cross buffer for normalization if self.p == math.huge then @@ -109,9 +105,8 @@ function Normalize:updateGradInput(input, gradOutput) else self.cross:cmul(self.normp,self.norm) end - self._gradInput:cdiv(self.cross:expand(n,d)) + self.gradInput:cdiv(self.cross:expandAs(gradOutput)) - self.gradInput:view(self._gradInput, input_size) return self.gradInput end @@ -119,11 +114,11 @@ function Normalize:__tostring__() local s -- different prints if the norm is integer if self.p % 1 == 0 then - s = '%s(%d)' + s = '%s(%d,%d)' else - s = '%s(%f)' + s = '%s(%f,%d)' end - return string.format(s,torch.type(self),self.p) + return string.format(s,torch.type(self),self.p, self.dim) end function Normalize:type(type, tensorCache) @@ -153,3 +148,11 @@ function Normalize:clearState() }) return parent.clearState(self) end + +function Normalize:read(file, version) + parent.read(self, file) + if version < 2 then + -- version 1 only supported 1D tensors + self.dim = -1 + end +end diff --git a/doc/simple.md b/doc/simple.md index e3b13dbc0..3d05ab1aa 100644 --- a/doc/simple.md +++ b/doc/simple.md @@ -1149,11 +1149,11 @@ print(B) -- output ## Normalize ## ```lua -module = nn.Normalize(p, [eps]) +module = nn.Normalize(p, [dim], [eps]) ``` -Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`). +Normalizes the input Tensor to have unit `L_p` norm over dimension `dim` (by default -1, i.e., the last dimension). The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`). -Input can be 1D or 2D (in which case it's considered as in batch mode) +The `dim` parameter can take both positive and negative values (in which case it is counted from the end). Negative dimensions are specially useful if one wants to be invariant to batch-mode. ```lua A = torch.randn(3, 5) @@ -1163,6 +1163,14 @@ B = m:forward(A) -- B is also 3 x 5 print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1] ``` +Here is an example of normalizing the feature maps of an image +```lua +I = torch.randn(2, 3, 2, 2) +m = nn.Normalize(1, -3) -- the third from the last element +B = m:forward(I) +print(torch.norm(B, 1, 2)) +``` + `Normalize` has a specialized implementation for the `inf` norm, which corresponds to the maximum norm. ```lua A = torch.randn(3,5) diff --git a/test.lua b/test.lua index e67a39a84..9d32cef02 100644 --- a/test.lua +++ b/test.lua @@ -624,6 +624,21 @@ function nntest.Normalize() mytester:assertlt(err, precision, 'error norm '..p..' on state ') end + -- test on different dimensions + for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do + local ini = math.random(3,5) + local inj = math.random(3,5) + local ink = math.random(3,5) + local inl = math.random(3,5) + local dim = math.random(1,4) + local input = torch.Tensor(inl, ink, inj, ini):zero() + + local module = nn.Normalize(p, dim) + + local err = jac.testJacobian(module, input, -2, 2) + mytester:assertlt(err, precision, 'error norm '..p..' on state ') + end + -- test IO correctness local ini = math.random(3,5) local inj = math.random(3,5)