Skip to content

Commit 20f4bcc

Browse files
committed
Normalize supports arbitrary dimensions
Assert that dim is integer in Normalize
1 parent 9cffea5 commit 20f4bcc

File tree

3 files changed

+59
-39
lines changed

3 files changed

+59
-39
lines changed

Normalize.lua

+33-36
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,35 @@
11
local Normalize, parent = torch.class('nn.Normalize', 'nn.Module')
22

3-
function Normalize:__init(p,eps)
3+
function Normalize:__init(p, dim, eps)
44
parent.__init(self)
55
assert(p,'p-norm not provided')
66
assert(p > 0, p..'-norm not supported')
77
self.p = p
8+
self.dim = dim or -1
89
self.eps = eps or 1e-10
910
end
1011

1112
function Normalize:updateOutput(input)
12-
assert(input:dim() <= 2, 'only 1d layer supported')
13-
local input_size = input:size()
14-
if input:dim() == 1 then
15-
input = input:view(1,-1)
13+
self.dim = self.dim or -1
14+
assert(math.abs(self.dim) <= input:dim(),
15+
'input has less dimensions than the normalization dimension')
16+
assert(self.dim % 1 == 0, 'dimension should be an integer')
17+
local dim = self.dim
18+
if dim < 0 then
19+
dim = input:dim() + dim + 1
1620
end
1721

18-
self._output = self._output or input.new()
1922
self.norm = self.norm or input.new()
2023
self.buffer = self.buffer or input.new()
2124

22-
self._output:resizeAs(input)
23-
2425
if self.p == math.huge then
2526
-- specialization for the infinity norm
2627
self._indices = self._indices or
2728
(torch.type(self.output) == 'torch.CudaTensor' and
2829
torch.CudaTensor() or torch.LongTensor())
2930

3031
self.buffer:abs(input)
31-
torch.max(self.norm, self._indices, self.buffer, 2)
32+
torch.max(self.norm, self._indices, self.buffer, dim)
3233
self.norm:add(self.eps)
3334
else
3435
self.normp = self.normp or input.new()
@@ -37,41 +38,37 @@ function Normalize:updateOutput(input)
3738
else
3839
self.buffer:pow(input,self.p)
3940
end
40-
self.normp:sum(self.buffer,2):add(self.eps)
41+
self.normp:sum(self.buffer, dim):add(self.eps)
4142
self.norm:pow(self.normp,1/self.p)
4243
end
43-
self._output:cdiv(input, self.norm:view(-1,1):expandAs(input))
44+
self.output:cdiv(input, self.norm:expandAs(input))
4445

45-
self.output:view(self._output, input_size)
4646
return self.output
4747
end
4848

4949
function Normalize:updateGradInput(input, gradOutput)
50-
assert(input:dim() <= 2, 'only 1d layer supported')
51-
assert(gradOutput:dim() <= 2, 'only 1d layer supported')
52-
53-
local input_size = input:size()
54-
if input:dim() == 1 then
55-
input = input:view(1,-1)
50+
self.dim = self.dim or -1
51+
assert(math.abs(self.dim) <= input:dim(),
52+
'input has less dimensions than the normalization dimension')
53+
assert(self.dim % 1 == 0, 'dimension should be an integer')
54+
local dim = self.dim
55+
if dim < 0 then
56+
dim = input:dim() + dim + 1
5657
end
5758

58-
local n = input:size(1) -- batch size
59-
local d = input:size(2) -- dimensionality of vectors
60-
61-
self._gradInput = self._gradInput or input.new()
6259
self.cross = self.cross or input.new()
6360
-- compute diagonal term with gradOutput
64-
self._gradInput:resize(n,d)
61+
self.gradInput:resizeAs(input)
6562
if self.p == math.huge then
6663
-- specialization for the inf case
67-
self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput)
64+
self.gradInput:cmul(self.norm:expandAs(gradOutput),gradOutput)
6865
self.buffer:resizeAs(input):zero()
69-
self.cross:resize(n,1)
70-
self.cross:gather(input,2,self._indices)
66+
self.cross:resizeAs(self.norm)
67+
self.cross:gather(input,dim,self._indices)
7168
self.cross:cdiv(self.norm)
72-
self.buffer:scatter(2,self._indices,self.cross)
69+
self.buffer:scatter(dim,self._indices,self.cross)
7370
else
74-
self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput)
71+
self.gradInput:cmul(self.normp:expandAs(gradOutput), gradOutput)
7572
-- small optimizations for different p
7673
-- buffer = input*|input|^(p-2)
7774
if self.p % 2 ~= 0 then
@@ -91,39 +88,39 @@ function Normalize:updateGradInput(input, gradOutput)
9188
end
9289
end
9390
-- compute cross term in two steps
94-
self.cross:resize(n,1)
91+
self.cross:resizeAs(self.norm)
9592

9693
-- instead of having a huge temporary matrix (b1*b2),
9794
-- do the computations as b1*(b2*gradOutput). This avoids redundant
9895
-- computation and also a huge buffer of size n*d^2
9996
self.buffer2 = self.buffer2 or input.new() -- nxd
10097
self.buffer2:cmul(input, gradOutput)
101-
self.cross:sum(self.buffer2, 2)
98+
self.cross:sum(self.buffer2, dim)
10299

103100
self.buffer:cmul(self.cross:expandAs(self.buffer))
104-
self._gradInput:add(-1, self.buffer)
101+
self.gradInput:add(-1, self.buffer)
105102

106103
-- reuse cross buffer for normalization
107104
if self.p == math.huge then
108105
self.cross:cmul(self.norm,self.norm)
109106
else
110107
self.cross:cmul(self.normp,self.norm)
111108
end
112-
self._gradInput:cdiv(self.cross:expand(n,d))
109+
self.gradInput:cdiv(self.cross:expandAs(gradOutput))
113110

114-
self.gradInput:view(self._gradInput, input_size)
115111
return self.gradInput
116112
end
117113

118114
function Normalize:__tostring__()
119115
local s
120116
-- different prints if the norm is integer
121117
if self.p % 1 == 0 then
122-
s = '%s(%d)'
118+
s = '%s(%d,%d)'
123119
else
124-
s = '%s(%f)'
120+
s = '%s(%f,%d)'
125121
end
126-
return string.format(s,torch.type(self),self.p)
122+
local dim = self.dim or -1
123+
return string.format(s,torch.type(self),self.p, dim)
127124
end
128125

129126
function Normalize:type(type, tensorCache)

doc/simple.md

+11-3
Original file line numberDiff line numberDiff line change
@@ -1149,11 +1149,11 @@ print(B) -- output
11491149
## Normalize ##
11501150

11511151
```lua
1152-
module = nn.Normalize(p, [eps])
1152+
module = nn.Normalize(p, [dim], [eps])
11531153
```
1154-
Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).
1154+
Normalizes the input Tensor to have unit `L_p` norm over dimension `dim` (by default -1, i.e., the last dimension). The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).
11551155

1156-
Input can be 1D or 2D (in which case it's considered as in batch mode)
1156+
The `dim` parameter can take both positivs and negative values (in which case it is counted from the end). Negative dimensions are specially useful if one wants to be invariant to batch-mode.
11571157

11581158
```lua
11591159
A = torch.randn(3, 5)
@@ -1163,6 +1163,14 @@ B = m:forward(A) -- B is also 3 x 5
11631163
print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1]
11641164
```
11651165

1166+
Here is an example of normalizing the feature maps of an image
1167+
```lua
1168+
I = torch.randn(2, 3, 2, 2)
1169+
m = nn.Normalize(1, -3) -- the third from the last element
1170+
B = m:forward(I)
1171+
print(torch.norm(B, 1, 2))
1172+
```
1173+
11661174
`Normalize` has a specialized implementation for the `inf` norm, which corresponds to the maximum norm.
11671175
```lua
11681176
A = torch.randn(3,5)

test.lua

+15
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,21 @@ function nntest.Normalize()
624624
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
625625
end
626626

627+
-- test on different dimensions
628+
for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do
629+
local ini = math.random(3,5)
630+
local inj = math.random(3,5)
631+
local ink = math.random(3,5)
632+
local inl = math.random(3,5)
633+
local dim = math.random(1,4)
634+
local input = torch.Tensor(inl, ink, inj, ini):zero()
635+
636+
local module = nn.Normalize(p, dim)
637+
638+
local err = jac.testJacobian(module, input, -2, 2)
639+
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
640+
end
641+
627642
-- test IO correctness
628643
local ini = math.random(3,5)
629644
local inj = math.random(3,5)

0 commit comments

Comments
 (0)