-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodel.py
124 lines (97 loc) · 6.24 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
import tensorflow as tf
class Model:
def __init__(self, parameter, model_number):
self.parameter = parameter
self.model_number = str(model_number)
def build_model(self):
with tf.variable_scope(self.model_number):
self._build_placeholder()
# { "morph": 0, "morph_tag": 1, "tag" : 2, "character": 3, .. }
self._embedding_matrix = []
for item in self.parameter["embedding"]:
self._embedding_matrix.append(self._build_embedding(item[1], item[2], name="embedding_" + self.model_number + item[0]))
# Bring Embedding
self._embeddings = []
self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph))
self._embeddings.append(tf.nn.embedding_lookup(self._embedding_matrix[1], self.character))
# Find the embedding value using character.
character_embedding = tf.reshape(self._embeddings[1], [-1, self.parameter["word_length"], self.parameter["embedding"][1][2]])
char_len = tf.reshape(self.character_len, [-1])
character_emb_rnn, _, _ = self._build_birnn_model(character_embedding, char_len, self.parameter["char_lstm_units"], self.dropout_rate, last=True, scope="char_layer" + self.model_number)
# Concat all embedding values obtained above.
all_data_emb = self.ne_dict
for i in range(0, len(self._embeddings)-1):
all_data_emb = tf.concat([all_data_emb, self._embeddings[i]], axis=2)
all_data_emb = tf.concat([all_data_emb, character_emb_rnn], axis=2)
# Bring all data and conduct Bi-RNN
sentence_output, W, B = self._build_birnn_model(all_data_emb, self.sequence, self.parameter["lstm_units"], self.dropout_rate, scope="all_data_layer" + self.model_number)
sentence_output = tf.matmul(sentence_output, W) + B
# CRF
crf_cost, crf_weight, crf_bias = self._build_crf_layer(sentence_output)
self.train_op = self._build_output_layer(crf_cost)
self.cost = crf_cost
def _build_placeholder(self):
self.morph = tf.placeholder(tf.int32, [None, None])
self.ne_dict = tf.placeholder(tf.float32, [None, None, int(self.parameter["n_class"] / 2)])
self.character = tf.placeholder(tf.int32, [None, None, None])
self.dropout_rate = tf.placeholder(tf.float32)
self.sequence = tf.placeholder(tf.int32, [None])
self.character_len = tf.placeholder(tf.int32, [None, None])
self.label = tf.placeholder(tf.int32, [None, None])
self.global_step = tf.Variable(0, trainable=False, name='global_step')
def _build_embedding(self, n_tokens, dimention, name="embedding"):
embedding_weights = tf.get_variable(
name, [n_tokens, dimention],
dtype=tf.float32,
)
return embedding_weights
def _build_single_cell(self, lstm_units, keep_prob):
cell = tf.contrib.rnn.LayerNormBasicLSTMCell(lstm_units)
cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
return cell
def _build_weight(self, shape, scope="weight"):
with tf.variable_scope(scope):
W = tf.get_variable(name="W", shape=[shape[0], shape[1]], dtype=tf.float32, initializer = tf.contrib.layers.xavier_initializer())
b = tf.get_variable(name="b", shape=[shape[1]], dtype=tf.float32, initializer = tf.zeros_initializer())
return W, b
def _build_birnn_model(self, target, seq_len, lstm_units, keep_prob, last=False, scope="layer"):
with tf.variable_scope("forward_" + scope):
lstm_fw_cell = self._build_single_cell(lstm_units, keep_prob)
with tf.variable_scope("backward_" + scope):
lstm_bw_cell = self._build_single_cell(lstm_units, keep_prob)
with tf.variable_scope("birnn-lstm_" + scope):
_output = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, dtype=tf.float32,
inputs = target, sequence_length = seq_len, scope="rnn_" + scope)
if last:
_, ((_, output_fw), (_, output_bw)) = _output
outputs = tf.concat([output_fw, output_bw], axis=1)
outputs = tf.reshape(outputs, shape=[-1, self.parameter["sentence_length"], 2 * lstm_units])
else:
(output_fw, output_bw), _ = _output
outputs = tf.concat([output_fw, output_bw], axis=2)
outputs = tf.reshape(outputs, shape=[-1, 2 * lstm_units])
W, b = self._build_weight([2 * self.parameter["lstm_units"], self.parameter["n_class"]], scope="output" + scope)
return outputs, W, b
def _build_crf_layer(self, target):
with tf.variable_scope("crf_layer" + self.model_number):
W, B = self._build_weight([self.parameter["n_class"], self.parameter["n_class"]], scope="weight_bias")
matricized_unary_scores = tf.matmul(target, W) + B
matricized_unary_scores = tf.reshape(matricized_unary_scores, [-1, self.parameter["sentence_length"], self.parameter["n_class"]])
self.matricized_unary_scores = matricized_unary_scores
self.log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(self.matricized_unary_scores, self.label, self.sequence)
cost = tf.reduce_mean(-self.log_likelihood)
self.viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(matricized_unary_scores, self.transition_params, self.sequence)
return cost, W, B
def _build_output_layer(self, cost):
with tf.variable_scope("output_layer"):
train_op = tf.train.RMSPropOptimizer(self.parameter["learning_rate"]).minimize(cost, global_step=self.global_step)
return train_op
if __name__ == "__main__":
parameter = {"embedding" : [['word', 10, 10], ['character', 10, 10]],
"lstm_units" : 32, "keep_prob" : 0.65,
"sequence_length": 300, "n_class" : 100, "batch_size": 128,
"learning_rate": 0.002, "word_length": 8, "char_lstm_units": 128, "sentence_length": 180
}
model = Model(parameter, 0)
model.build_model()