Skip to content

Commit 67d8fb4

Browse files
committed
Initial commit
0 parents  commit 67d8fb4

File tree

4 files changed

+344
-0
lines changed

4 files changed

+344
-0
lines changed

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#my_blstm
2+
<center><img src="./blstm.png" width=400></center>
3+
Quick and dirty bidirectional LSTM layer in Python.

blstm.png

36.7 KB
Loading

my_blstm.py

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#! /usr/bin/env python
2+
"""
3+
file: my_blstm.py
4+
author: thomas wood ([email protected])
5+
description: Quick and dirty Bi-directional LSTM layer in Python.
6+
"""
7+
import numpy as np
8+
from numpy import tanh
9+
from numpy.random import random
10+
from string import printable
11+
from my_lstm import gen_bag_hashtable, \
12+
make_wordvector, \
13+
make_string, \
14+
LSTMLayer
15+
16+
def activation(z, method="tanh"):
17+
"""
18+
Defaults to "tanh".
19+
Probably shouldn't ever neglect to use that, but whatever.
20+
"""
21+
if method == "tanh":
22+
return tanh(z)
23+
elif method == "linear":
24+
return z
25+
elif method == "sigmoid":
26+
return 1./(1.+np.exp(-z))
27+
28+
29+
30+
def gen_bag_hashtable():
31+
N = len(printable)
32+
table = {}
33+
for k in range(N):
34+
table[printable[k]] = k
35+
return table
36+
37+
def make_wordvector(s, table):
38+
N = len(printable)
39+
L = len(s)
40+
a = np.zeros((N,L))
41+
for k in range(L):
42+
a[ table[ s[k] ], k ] = 1
43+
return a
44+
45+
def make_string(x):
46+
s = []
47+
for k in range(x.shape[1]):
48+
s.append(printable[np.argmax(x[:,k])])
49+
return ''.join(s)
50+
51+
class BLSTMLayer:
52+
def __init__(self, n_in, n_hidden, n_out, params, eps):
53+
"""
54+
The number of parameters in a single LSTM layer is
55+
n_lstm =
56+
4*n_in*n_hidden # four W_{i,c,f,o} input weight matrices
57+
+ 4*n_hidden**2 # four U_* recurrent weight matrcies
58+
+ 4*n_hidden # four b_* bias vectors
59+
+ n_hidden**2 # one V_o matrix of weights
60+
61+
We use two matrices of size n_hidden*n_out along with
62+
a bias vector of size n_out to compute the output of
63+
the BLSTMLayer for a given input sequence, so the total
64+
number of parameters is
65+
66+
n_total_params = 2*n_lstm + 2*n_hidden*n_out + n_out
67+
"""
68+
self.n_in = n_in
69+
self.n_hidden = n_hidden
70+
self.n_out = n_out
71+
72+
n_lstm = 4*n_in*n_hidden + \
73+
4*n_hidden**2 + \
74+
4*n_hidden + \
75+
n_hidden**2
76+
77+
78+
# slice 'em and dice 'em
79+
ind_fwd = n_lstm # forward parameter index
80+
self.forward_params = params[:ind_fwd] # don't reshape
81+
ind_back = ind_fwd + n_lstm # backward parameter index
82+
self.backward_params = params[ind_fwd:ind_back] # don't reshape
83+
ind_W = ind_back + 2*n_hidden*n_out # Output weights
84+
self.W = params[ind_back:ind_W].reshape((n_out, 2*n_hidden))
85+
self.bias = params[ind_W:] # output bias
86+
87+
def gen_sequence(self, X):
88+
n_in = self.n_in
89+
n_hidden = self.n_hidden
90+
n_out = self.n_out
91+
T = X.shape[1] # size of input sequence
92+
93+
# big matrix of forward and backward hidden state values
94+
# We are going to use two LSTMs to populate this matrix
95+
H = np.zeros((2*n_hidden, T))
96+
97+
98+
# a single LSTMLayer for stepping forward
99+
# TODO!!!will look into multiple lstm layering inside
100+
# the bidirectional framework in a minute, but first
101+
# things first... !!!
102+
lstmFwd = LSTMLayer(n_in, n_hidden, self.forward_params, eps=0.0)
103+
# an LSTMLayer for stepping backward
104+
lstmBack = LSTMLayer(n_in, n_hidden, self.backward_params, eps=0.0)
105+
106+
for k in range(T):
107+
# FORWARD: calculate forward hidden state values
108+
H[:n_hidden,k] = lstmFwd.step(X[:,k])
109+
# BACKWARD: calculate backward hidden state values
110+
H[n_hidden:,k] = lstmBack.step(X[:,T-1-k])
111+
112+
return activation(np.dot(self.W, H), method="linear")
113+
114+
def rudimentary_test():
115+
s = """0 a is the quick fox who jumped over the lazy brown dog's new sentence."""
116+
table = gen_bag_hashtable()
117+
118+
v = make_wordvector(s, table)
119+
120+
n_in, T = v.shape
121+
n_out = n_in
122+
n_hidden = 100 # Learn a more complex representation?
123+
eps1 = 0.00001
124+
eps2 = 0.001
125+
126+
n_lstm = 4*n_in*n_hidden + \
127+
4*n_hidden**2 + \
128+
4*n_hidden + \
129+
n_hidden**2
130+
131+
n_params = 2*n_lstm + 2*n_hidden*n_out + n_out
132+
133+
params1 = eps1*(2*random(n_params,)-1.)
134+
135+
blstm = BLSTMLayer(n_in, n_hidden, n_in, params1, eps2)
136+
137+
y1 = blstm.gen_sequence(v)
138+
139+
s1 = make_string(y1)
140+
print s1
141+
142+
if __name__ == "__main__":
143+
rudimentary_test()

my_lstm.py

+198
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
#! /usr/bin/env python
2+
"""
3+
File: my_lstm.py
4+
5+
Author: Thomas Wood ([email protected])
6+
7+
Description: a quick and dirty lstm layer based on description of lstm networks
8+
at http://deeplearning.net/tutorial/lstm.html
9+
10+
"""
11+
12+
import numpy as np
13+
from numpy import tanh
14+
from numpy.random import random
15+
from string import printable
16+
17+
18+
def sigmoid(z):
19+
return 1./(1.+np.exp(-z))
20+
21+
def rand_mat(nrow, ncol, sigma, mu=0.0):
22+
return sigma*(2*np.random.random((nrow,ncol))-1.) + np.tile(mu,(nrow,ncol))
23+
24+
def gen_bag_hashtable():
25+
N = len(printable)
26+
table = {}
27+
for k in range(N):
28+
table[printable[k]] = k
29+
return table
30+
31+
def make_wordvector(s, table):
32+
N = len(printable)
33+
L = len(s)
34+
a = np.zeros((N,L))
35+
for k in range(L):
36+
a[ table[ s[k] ], k ] = 1
37+
return a
38+
39+
def make_string(x):
40+
s = []
41+
for k in range(x.shape[1]):
42+
s.append(printable[np.argmax(x[:,k])])
43+
return ''.join(s)
44+
45+
class LSTMLayer:
46+
"""
47+
There are four afferent weight matrices:
48+
49+
W_i - used to update input gate
50+
W_c - used to update prelimiary candidate hidden state
51+
W_f - used to update forget gate
52+
W_o - used to upate output gate
53+
54+
four recurrent weight matrices (U_i, U_c, U_f, U_o)
55+
56+
and four bias vectors (b_i, b_c, b_f, b_o)
57+
58+
along with a weight matrix for the candidate vector (V_o).
59+
60+
There are also the persistent values used to step forward the lstm layer,
61+
the hidden state -- h_(t-1), and
62+
the candidate vector -- C_(t-1)
63+
64+
"""
65+
def __init__(self, n_in, n_out, params, eps=0.001):
66+
67+
self.n_input = n_in # dimension of the input vector x_t
68+
self.n_output = n_out
69+
####---- LAYER PARAMETERS
70+
71+
# W consists of four afferent weight matrices W_i, W_c, W_f, W_o
72+
ind_W = 4*n_in*n_out
73+
self.W = params[:ind_W].reshape((4*n_out, n_in))
74+
# U consists of four recurrent weight matrices U_i, U_c, U_f, U_o
75+
ind_U = ind_W + 4*n_out*n_out
76+
self.U = params[ind_W:ind_U].reshape((4*n_out, n_out))
77+
# bias consists of four biases b_i, b_c, b_f, b_o
78+
ind_bias = ind_U + 4*n_out
79+
self.bias = params[ind_U:ind_bias].reshape((4*n_out, ))
80+
# One more matrix just for the value of the candidate vector
81+
self.V_o = params[ind_bias:].reshape((n_out, n_out))
82+
83+
####---- LAYER STATES - (PERSISTENT)
84+
85+
# h is the value of the hidden state of the layer
86+
self.h = eps*(2*random((n_in,))-1.)
87+
88+
# X is the candidate value
89+
self.C = eps*(2*random((n_in,))-1.)
90+
91+
def step(self, x):
92+
"""
93+
Input Gate update rule:
94+
i_t = sigmoid(W_i*x_t + U_i*h_(t-1) + b_i)
95+
96+
Preliminary Candidate hidden state update rule:
97+
Cprelim_t = tanh(W_c*x_t +U_c*h_(t-1) + b_c)
98+
99+
Forget Gate update rule:
100+
f_t = sigmoid(W_f*x_t + U_f*h_(t-1) + b_f)
101+
102+
Candidate hidden state update rule:
103+
C_t = i_t*Cprelim_t + f_t*C_(t-1)
104+
105+
Output Gate update rule:
106+
o_t = sigmoid(W_o*x_t +U_o*h_(t-1) +V_o*C_t + b_o)
107+
108+
Hidden state update rule:
109+
h_t = o_t * tanh(C_t)
110+
111+
"""
112+
113+
# We have stacked the afferent and reccurent weight matrices to allow
114+
# us to easily compute the products of x and h with their respective
115+
# weight matrix with a single step.
116+
W_x = np.dot(self.W, x)#.reshape((self.W.shape[0],1))
117+
U_h = np.dot(self.U, self.h)
118+
119+
n = self.n_output # for ease of reading and writing
120+
121+
# Split the pre-calculated matrices up for easier access
122+
# Common practice for me when splitting up an array in this fashion
123+
# I will often go back through and remove unnecessary variables.
124+
125+
# W_i_x = W_x[:n]
126+
# W_c_x = W_x[n:2*n]
127+
# W_f_x = W_x[2*n:3*n]
128+
# W_o_x = W_x[3*n:]
129+
#
130+
# U_i_h = U_h[:n]
131+
# U_c_h = U_h[n:2*n]
132+
# U_f_h = U_h[2*n:3*n]
133+
# U_o_h = U_h[3*n:]
134+
135+
# i_t = sigmoid(W_i_x + U_i_h + self.bias[:n])
136+
# C_pre = tanh(W_c_x + U_c_h + self.bias[n:2*n])
137+
# f_t = sigmoid(W_f_x + U_f_h + self.bias[2*n:3*n])
138+
139+
# self.C = i_t * C_pre + f_t * self.C
140+
141+
142+
self.C = sigmoid(W_x[:n] + U_h[:n] + self.bias[:n]) \
143+
* tanh(W_x[n:2*n] + U_h[n:2*n] + self.bias[n:2*n]) \
144+
+ sigmoid(W_x[2*n:3*n] + U_h[2*n:3*n] + self.bias[2*n:3*n]) \
145+
* self.C
146+
147+
# o_t = sigmoid(W_o_x + U_o_h + np.dot(self.V_o,self.C) + self.bias[3*n:])
148+
# self.h = o_t * tanh(self.C)
149+
self.h = sigmoid(W_x[3*n:] +U_h[3*n:] + \
150+
np.dot(self.V_o, self.C) + self.bias[3*n:]) * tanh(self.C)
151+
152+
return self.h
153+
154+
def rudimentary_test():
155+
"""
156+
Very simple test of BRNNLayer functionality. I'm training a DQN for
157+
Space Invaders right now and I don't really want to get into any training
158+
until my GPU is free for all the matrix multiplication.
159+
160+
Right now this is just a fun example of how to multiply random numbers
161+
to get more random numbers. I might add in some objective costs along with
162+
some optimization routines, but I would likely make a new repository for
163+
my optimization function.
164+
"""
165+
166+
s = """0 a is the quick fox who jumped over the lazy brown dog's new sentence."""
167+
table = gen_bag_hashtable()
168+
169+
v = make_wordvector(s, table)
170+
171+
n_in, T = v.shape
172+
n_out = n_in
173+
n_hidden = 100 # Learn a more complex representation?
174+
eps = 0.1
175+
176+
177+
n_params = 2*n_in*n_hidden + \
178+
2*n_hidden*n_hidden + \
179+
2*n_out*n_hidden + \
180+
2*n_hidden+n_out
181+
182+
params1 = eps*(2*random(n_params,)-1.)
183+
params2 = eps*(2*random(n_params,)-1.)
184+
params3 = eps*(2*random(n_params,)-1.)
185+
186+
brnn1 = BRNNLayer(n_in,n_hidden,n_in,params1, eps)
187+
brnn2 = BRNNLayer(n_in,n_hidden,n_in,params2, eps)
188+
brnn3 = BRNNLayer(n_in,n_hidden,n_in,params3, eps)
189+
190+
y1 = brnn1.gen_sequence(v)
191+
y2 = brnn2.gen_sequence(y1)
192+
y3 = brnn3.gen_sequence(y2)
193+
194+
s1 = make_string(y3)
195+
print s1
196+
197+
if __name__ == "__main__":
198+
rudimentary_test()

0 commit comments

Comments
 (0)