Initial commit

odellus · odellus · commit 67d8fb4a22cc · 2015-12-25T04:18:11.000-08:00
diff --git a/README.md b/README.md
@@ -0,0 +1,3 @@
+#my_blstm
+<center><img src="./blstm.png" width=400></center>
+Quick and dirty bidirectional LSTM layer in Python.
diff --git a/blstm.png b/blstm.png
diff --git a/my_blstm.py b/my_blstm.py
@@ -0,0 +1,143 @@
+#! /usr/bin/env python
+"""
+file: my_blstm.py
+author: thomas wood (thomas@wgapl.com)
+description: Quick and dirty Bi-directional LSTM layer in Python.
+"""
+import numpy as np
+from numpy import tanh
+from numpy.random import random
+from string import printable
+from my_lstm import gen_bag_hashtable, \
+    make_wordvector, \
+    make_string, \
+    LSTMLayer
+
+def activation(z, method="tanh"):
+    """
+    Defaults to "tanh".
+    Probably shouldn't ever neglect to use that, but whatever.
+    """
+    if method == "tanh":
+        return tanh(z)
+    elif method == "linear":
+        return z
+    elif method == "sigmoid":
+        return 1./(1.+np.exp(-z))
+
+
+
+def gen_bag_hashtable():
+    N = len(printable)
+    table = {}
+    for k in range(N):
+        table[printable[k]] = k
+    return table
+
+def make_wordvector(s, table):
+    N = len(printable)
+    L = len(s)
+    a = np.zeros((N,L))
+    for k in range(L):
+        a[ table[ s[k] ], k ] = 1
+    return a
+
+def make_string(x):
+    s = []
+    for k in range(x.shape[1]):
+        s.append(printable[np.argmax(x[:,k])])
+    return ''.join(s)
+
+class BLSTMLayer:
+    def __init__(self, n_in, n_hidden, n_out, params, eps):
+        """
+        The number of parameters in a single LSTM layer is
+        n_lstm =
+        4*n_in*n_hidden  # four W_{i,c,f,o} input weight matrices
+        + 4*n_hidden**2  # four U_* recurrent weight matrcies
+        + 4*n_hidden     # four b_* bias vectors
+        + n_hidden**2    # one V_o matrix of weights
+
+        We use two matrices of size n_hidden*n_out along with
+        a bias vector of size n_out to compute the output of
+        the BLSTMLayer for a given input sequence, so the total
+        number of parameters is
+
+        n_total_params = 2*n_lstm + 2*n_hidden*n_out + n_out
+        """
+        self.n_in = n_in
+        self.n_hidden = n_hidden
+        self.n_out = n_out
+
+        n_lstm = 4*n_in*n_hidden + \
+            4*n_hidden**2 + \
+            4*n_hidden + \
+            n_hidden**2
+
+
+        # slice 'em and dice 'em
+        ind_fwd = n_lstm # forward parameter index
+        self.forward_params = params[:ind_fwd] # don't reshape
+        ind_back = ind_fwd + n_lstm # backward parameter index
+        self.backward_params = params[ind_fwd:ind_back] # don't reshape
+        ind_W = ind_back + 2*n_hidden*n_out # Output weights
+        self.W = params[ind_back:ind_W].reshape((n_out, 2*n_hidden))
+        self.bias = params[ind_W:] # output bias
+
+    def gen_sequence(self, X):
+        n_in = self.n_in
+        n_hidden = self.n_hidden
+        n_out = self.n_out
+        T = X.shape[1] # size of input sequence
+
+        # big matrix of forward and backward hidden state values
+        # We are going to use two LSTMs to populate this matrix
+        H = np.zeros((2*n_hidden, T))
+
+
+        # a single LSTMLayer for stepping forward
+        # TODO!!!will look into multiple lstm layering inside
+        # the bidirectional framework in a minute, but first
+        # things first... !!!
+        lstmFwd = LSTMLayer(n_in, n_hidden, self.forward_params, eps=0.0)
+        # an LSTMLayer for stepping backward
+        lstmBack = LSTMLayer(n_in, n_hidden, self.backward_params, eps=0.0)
+
+        for k in range(T):
+            # FORWARD: calculate forward hidden state values
+            H[:n_hidden,k] = lstmFwd.step(X[:,k])
+            # BACKWARD: calculate backward hidden state values
+            H[n_hidden:,k] = lstmBack.step(X[:,T-1-k])
+
+        return activation(np.dot(self.W, H), method="linear")
+
+def rudimentary_test():
+    s = """0 a is the quick fox who jumped over the lazy brown dog's new sentence."""
+    table = gen_bag_hashtable()
+
+    v = make_wordvector(s, table)
+
+    n_in, T = v.shape
+    n_out = n_in
+    n_hidden = 100 # Learn a more complex representation?
+    eps1 = 0.00001
+    eps2 = 0.001
+
+    n_lstm = 4*n_in*n_hidden + \
+        4*n_hidden**2 + \
+        4*n_hidden + \
+        n_hidden**2
+
+    n_params = 2*n_lstm + 2*n_hidden*n_out + n_out
+
+    params1 = eps1*(2*random(n_params,)-1.)
+
+    blstm = BLSTMLayer(n_in, n_hidden, n_in, params1, eps2)
+
+    y1 = blstm.gen_sequence(v)
+
+    s1 = make_string(y1)
+    print s1
+
+if __name__ == "__main__":
+    rudimentary_test()
diff --git a/my_lstm.py b/my_lstm.py
@@ -0,0 +1,198 @@
+#! /usr/bin/env python
+"""
+File: my_lstm.py
+
+Author: Thomas Wood (thomas@wgapl.com)
+
+Description: a quick and dirty lstm layer based on description of lstm networks
+at http://deeplearning.net/tutorial/lstm.html
+
+"""
+
+import numpy as np
+from numpy import tanh
+from numpy.random import random
+from string import printable
+
+
+def sigmoid(z):
+    return 1./(1.+np.exp(-z))
+
+def rand_mat(nrow, ncol, sigma, mu=0.0):
+    return sigma*(2*np.random.random((nrow,ncol))-1.) + np.tile(mu,(nrow,ncol))
+
+def gen_bag_hashtable():
+    N = len(printable)
+    table = {}
+    for k in range(N):
+        table[printable[k]] = k
+    return table
+
+def make_wordvector(s, table):
+    N = len(printable)
+    L = len(s)
+    a = np.zeros((N,L))
+    for k in range(L):
+        a[ table[ s[k] ], k ] = 1
+    return a
+
+def make_string(x):
+    s = []
+    for k in range(x.shape[1]):
+        s.append(printable[np.argmax(x[:,k])])
+    return ''.join(s)
+
+class LSTMLayer:
+    """
+    There are four afferent weight matrices:
+
+        W_i - used to update input gate
+        W_c - used to update prelimiary candidate hidden state
+        W_f - used to update forget gate
+        W_o - used to upate output gate
+
+    four recurrent weight matrices (U_i, U_c, U_f, U_o)
+
+    and four bias vectors (b_i, b_c, b_f, b_o)
+
+    along with a weight matrix for the candidate vector (V_o).
+
+    There are also the persistent values used to step forward the lstm layer,
+    the hidden state -- h_(t-1),    and
+    the candidate vector -- C_(t-1)
+
+    """
+    def __init__(self, n_in, n_out, params, eps=0.001):
+
+        self.n_input = n_in # dimension of the input vector x_t
+        self.n_output = n_out
+        ####---- LAYER PARAMETERS
+
+        # W consists of four afferent weight matrices W_i, W_c, W_f, W_o
+        ind_W = 4*n_in*n_out
+        self.W = params[:ind_W].reshape((4*n_out, n_in))
+        # U consists of four recurrent weight matrices U_i, U_c, U_f, U_o
+        ind_U = ind_W + 4*n_out*n_out
+        self.U = params[ind_W:ind_U].reshape((4*n_out, n_out))
+        # bias consists of four biases b_i, b_c, b_f, b_o
+        ind_bias = ind_U + 4*n_out
+        self.bias = params[ind_U:ind_bias].reshape((4*n_out, ))
+        # One more matrix just for the value of the candidate vector
+        self.V_o = params[ind_bias:].reshape((n_out, n_out))
+
+        ####---- LAYER STATES - (PERSISTENT)
+
+        # h is the value of the hidden state of the layer
+        self.h = eps*(2*random((n_in,))-1.)
+
+        # X is the candidate value
+        self.C = eps*(2*random((n_in,))-1.)
+
+    def step(self, x):
+        """
+        Input Gate update rule:
+        i_t = sigmoid(W_i*x_t + U_i*h_(t-1) + b_i)
+
+        Preliminary Candidate hidden state update rule:
+        Cprelim_t = tanh(W_c*x_t +U_c*h_(t-1) + b_c)
+
+        Forget Gate update rule:
+        f_t = sigmoid(W_f*x_t + U_f*h_(t-1) + b_f)
+
+        Candidate hidden state update rule:
+        C_t = i_t*Cprelim_t + f_t*C_(t-1)
+
+        Output Gate update rule:
+        o_t = sigmoid(W_o*x_t +U_o*h_(t-1) +V_o*C_t + b_o)
+
+        Hidden state update rule:
+        h_t = o_t * tanh(C_t)
+
+        """
+
+        # We have stacked the afferent and reccurent weight matrices to allow
+        # us to easily compute the products of x and h with their respective
+        # weight matrix with a single step.
+        W_x = np.dot(self.W, x)#.reshape((self.W.shape[0],1))
+        U_h = np.dot(self.U, self.h)
+
+        n = self.n_output # for ease of reading and writing
+
+        # Split the pre-calculated matrices up for easier access
+        # Common practice for me when splitting up an array in this fashion
+        # I will often go back through and remove unnecessary variables.
+
+        # W_i_x = W_x[:n]
+        # W_c_x = W_x[n:2*n]
+        # W_f_x = W_x[2*n:3*n]
+        # W_o_x = W_x[3*n:]
+        #
+        # U_i_h = U_h[:n]
+        # U_c_h = U_h[n:2*n]
+        # U_f_h = U_h[2*n:3*n]
+        # U_o_h = U_h[3*n:]
+
+        # i_t = sigmoid(W_i_x + U_i_h + self.bias[:n])
+        # C_pre = tanh(W_c_x + U_c_h + self.bias[n:2*n])
+        # f_t = sigmoid(W_f_x + U_f_h + self.bias[2*n:3*n])
+
+        # self.C = i_t *  C_pre + f_t * self.C
+
+
+        self.C = sigmoid(W_x[:n] + U_h[:n] + self.bias[:n]) \
+        *  tanh(W_x[n:2*n] + U_h[n:2*n] + self.bias[n:2*n]) \
+        + sigmoid(W_x[2*n:3*n] + U_h[2*n:3*n] + self.bias[2*n:3*n]) \
+        * self.C
+
+        # o_t = sigmoid(W_o_x + U_o_h + np.dot(self.V_o,self.C) + self.bias[3*n:])
+        # self.h = o_t * tanh(self.C)
+        self.h = sigmoid(W_x[3*n:] +U_h[3*n:] + \
+        np.dot(self.V_o, self.C) + self.bias[3*n:]) * tanh(self.C)
+
+        return self.h
+
+def rudimentary_test():
+    """
+    Very simple test of BRNNLayer functionality. I'm training a DQN for
+    Space Invaders right now and I don't really want to get into any training
+    until my GPU is free for all the matrix multiplication.
+
+    Right now this is just a fun example of how to multiply random numbers
+    to get more random numbers. I might add in some objective costs along with
+    some optimization routines, but I would likely make a new repository for
+    my optimization function.
+    """
+
+    s = """0 a is the quick fox who jumped over the lazy brown dog's new sentence."""
+    table = gen_bag_hashtable()
+
+    v = make_wordvector(s, table)
+
+    n_in, T = v.shape
+    n_out = n_in
+    n_hidden = 100 # Learn a more complex representation?
+    eps = 0.1
+
+
+    n_params = 2*n_in*n_hidden + \
+    2*n_hidden*n_hidden + \
+    2*n_out*n_hidden + \
+    2*n_hidden+n_out
+
+    params1 = eps*(2*random(n_params,)-1.)
+    params2 = eps*(2*random(n_params,)-1.)
+    params3 = eps*(2*random(n_params,)-1.)
+
+    brnn1 = BRNNLayer(n_in,n_hidden,n_in,params1, eps)
+    brnn2 = BRNNLayer(n_in,n_hidden,n_in,params2, eps)
+    brnn3 = BRNNLayer(n_in,n_hidden,n_in,params3, eps)
+
+    y1 = brnn1.gen_sequence(v)
+    y2 = brnn2.gen_sequence(y1)
+    y3 = brnn3.gen_sequence(y2)
+
+    s1 = make_string(y3)
+    print s1
+
+if __name__ == "__main__":
+    rudimentary_test()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#my_blstm`
	`2`	`+<center><img src="./blstm.png" width=400></center>`
	`3`	`+Quick and dirty bidirectional LSTM layer in Python.`