Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize supports arbitrary dimensions #767

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 39 additions & 36 deletions Normalize.lua
Original file line number Diff line number Diff line change
@@ -1,34 +1,36 @@
local Normalize, parent = torch.class('nn.Normalize', 'nn.Module')
Normalize.__version = 2

function Normalize:__init(p,eps)
function Normalize:__init(p, dim, eps)
parent.__init(self)
assert(p,'p-norm not provided')
assert(p > 0, p..'-norm not supported')
self.p = p
self.dim = dim or -1
self.eps = eps or 1e-10
assert(self.dim % 1 == 0, 'dimension should be an integer')
assert(self.dim ~= 0, "dimension can't be 0")
end

function Normalize:updateOutput(input)
assert(input:dim() <= 2, 'only 1d layer supported')
local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
local dim = self.dim
if dim < 0 then
dim = input:dim() + dim + 1
end

self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()

self._output:resizeAs(input)

if self.p == math.huge then
-- specialization for the infinity norm
self._indices = self._indices or
(torch.type(self.output) == 'torch.CudaTensor' and
torch.CudaTensor() or torch.LongTensor())

self.buffer:abs(input)
torch.max(self.norm, self._indices, self.buffer, 2)
torch.max(self.norm, self._indices, self.buffer, dim)
self.norm:add(self.eps)
else
self.normp = self.normp or input.new()
Expand All @@ -37,41 +39,35 @@ function Normalize:updateOutput(input)
else
self.buffer:pow(input,self.p)
end
self.normp:sum(self.buffer,2):add(self.eps)
self.normp:sum(self.buffer, dim):add(self.eps)
self.norm:pow(self.normp,1/self.p)
end
self._output:cdiv(input, self.norm:view(-1,1):expandAs(input))
self.output:cdiv(input, self.norm:expandAs(input))

self.output:view(self._output, input_size)
return self.output
end

function Normalize:updateGradInput(input, gradOutput)
assert(input:dim() <= 2, 'only 1d layer supported')
assert(gradOutput:dim() <= 2, 'only 1d layer supported')

local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
local dim = self.dim
if dim < 0 then
dim = input:dim() + dim + 1
end

local n = input:size(1) -- batch size
local d = input:size(2) -- dimensionality of vectors

self._gradInput = self._gradInput or input.new()
self.cross = self.cross or input.new()
-- compute diagonal term with gradOutput
self._gradInput:resize(n,d)
self.gradInput:resizeAs(input)
if self.p == math.huge then
-- specialization for the inf case
self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput)
self.gradInput:cmul(self.norm:expandAs(gradOutput),gradOutput)
self.buffer:resizeAs(input):zero()
self.cross:resize(n,1)
self.cross:gather(input,2,self._indices)
self.cross:resizeAs(self.norm)
self.cross:gather(input,dim,self._indices)
self.cross:cdiv(self.norm)
self.buffer:scatter(2,self._indices,self.cross)
self.buffer:scatter(dim,self._indices,self.cross)
else
self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput)
self.gradInput:cmul(self.normp:expandAs(gradOutput), gradOutput)
-- small optimizations for different p
-- buffer = input*|input|^(p-2)
if self.p % 2 ~= 0 then
Expand All @@ -91,39 +87,38 @@ function Normalize:updateGradInput(input, gradOutput)
end
end
-- compute cross term in two steps
self.cross:resize(n,1)
self.cross:resizeAs(self.norm)

-- instead of having a huge temporary matrix (b1*b2),
-- do the computations as b1*(b2*gradOutput). This avoids redundant
-- computation and also a huge buffer of size n*d^2
self.buffer2 = self.buffer2 or input.new() -- nxd
self.buffer2:cmul(input, gradOutput)
self.cross:sum(self.buffer2, 2)
self.cross:sum(self.buffer2, dim)

self.buffer:cmul(self.cross:expandAs(self.buffer))
self._gradInput:add(-1, self.buffer)
self.gradInput:add(-1, self.buffer)

-- reuse cross buffer for normalization
if self.p == math.huge then
self.cross:cmul(self.norm,self.norm)
else
self.cross:cmul(self.normp,self.norm)
end
self._gradInput:cdiv(self.cross:expand(n,d))
self.gradInput:cdiv(self.cross:expandAs(gradOutput))

self.gradInput:view(self._gradInput, input_size)
return self.gradInput
end

function Normalize:__tostring__()
local s
-- different prints if the norm is integer
if self.p % 1 == 0 then
s = '%s(%d)'
s = '%s(%d,%d)'
else
s = '%s(%f)'
s = '%s(%f,%d)'
end
return string.format(s,torch.type(self),self.p)
return string.format(s,torch.type(self),self.p, self.dim)
end

function Normalize:type(type, tensorCache)
Expand Down Expand Up @@ -153,3 +148,11 @@ function Normalize:clearState()
})
return parent.clearState(self)
end

function Normalize:read(file, version)
parent.read(self, file)
if version < 2 then
-- version 1 only supported 1D tensors
self.dim = -1
end
end
14 changes: 11 additions & 3 deletions doc/simple.md
Original file line number Diff line number Diff line change
Expand Up @@ -1149,11 +1149,11 @@ print(B) -- output
## Normalize ##

```lua
module = nn.Normalize(p, [eps])
module = nn.Normalize(p, [dim], [eps])
```
Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).
Normalizes the input Tensor to have unit `L_p` norm over dimension `dim` (by default -1, i.e., the last dimension). The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).

Input can be 1D or 2D (in which case it's considered as in batch mode)
The `dim` parameter can take both positive and negative values (in which case it is counted from the end). Negative dimensions are specially useful if one wants to be invariant to batch-mode.

```lua
A = torch.randn(3, 5)
Expand All @@ -1163,6 +1163,14 @@ B = m:forward(A) -- B is also 3 x 5
print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1]
```

Here is an example of normalizing the feature maps of an image
```lua
I = torch.randn(2, 3, 2, 2)
m = nn.Normalize(1, -3) -- the third from the last element
B = m:forward(I)
print(torch.norm(B, 1, 2))
```

`Normalize` has a specialized implementation for the `inf` norm, which corresponds to the maximum norm.
```lua
A = torch.randn(3,5)
Expand Down
15 changes: 15 additions & 0 deletions test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,21 @@ function nntest.Normalize()
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test on different dimensions
for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do
local ini = math.random(3,5)
local inj = math.random(3,5)
local ink = math.random(3,5)
local inl = math.random(3,5)
local dim = math.random(1,4)
local input = torch.Tensor(inl, ink, inj, ini):zero()

local module = nn.Normalize(p, dim)

local err = jac.testJacobian(module, input, -2, 2)
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test IO correctness
local ini = math.random(3,5)
local inj = math.random(3,5)
Expand Down