forked from breadbread1984/c3d
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_c3d.py
112 lines (104 loc) · 5.45 KB
/
train_c3d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import;
from __future__ import division;
from __future__ import print_function;
import os;
import numpy as np;
import tensorflow as tf;
batch_size = 8;
class_num = 101;
def main(unused_argv):
action_classifier = tf.estimator.Estimator(model_fn = action_model_fn, model_dir = "action_classifier_model");
tf.logging.set_verbosity(tf.logging.DEBUG);
logging_hook = tf.train.LoggingTensorHook(tensors = {"loss":"loss"}, every_n_iter = 1);
action_classifier.train(input_fn = train_input_fn,steps = 200000,hooks = [logging_hook]);
eval_results = action_classifier.evaluate(input_fn = eval_input_fn);
print(eval_results);
def parse_function(serialized_example):
feature = tf.parse_single_example(
serialized_example,
features = {
'clips': tf.FixedLenFeature((),dtype = tf.string, default_value = ''),
'label': tf.FixedLenFeature((),dtype = tf.int64, default_value = 0)
}
);
clips = tf.decode_raw(feature['clips'],out_type = tf.uint8);
clips = tf.reshape(clips,[16,112,112,3]);
clips = tf.cast(clips, dtype = tf.float32);
label = tf.cast(feature['label'], dtype = tf.int32);
return clips,label;
def train_input_fn():
dataset = tf.data.TFRecordDataset(['trainset.tfrecord']);
dataset = dataset.map(parse_function);
dataset = dataset.shuffle(buffer_size = 512);
dataset = dataset.batch(batch_size);
dataset = dataset.repeat(200);
iterator = dataset.make_one_shot_iterator();
features, labels = iterator.get_next();
return features, labels;
def eval_input_fn():
dataset = tf.data.TFRecordDataset(['testset.tfrecord']);
dataset = dataset.map(parse_function);
dataset = dataset.shuffle(buffer_size = 512);
dataset = dataset.batch(batch_size);
dataset = dataset.repeat(1);
iterator = dataset.make_one_shot_iterator();
features, labels = iterator.get_next();
return features, labels;
def action_model_fn(features, labels, mode):
# with tf.device('/device:GPU:1'):
#layer 1
c1 = tf.layers.conv3d(features,filters = 64, kernel_size = [3,3,3], padding = "same");
b1 = tf.contrib.layers.layer_norm(c1,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
p1 = tf.layers.max_pooling3d(b1,pool_size = [1,2,2], strides = [1,2,2], padding = "same");
#layer 2
c2 = tf.layers.conv3d(p1,filters = 128, kernel_size = [3,3,3], padding = "same");
b2 = tf.contrib.layers.layer_norm(c2,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
p2 = tf.layers.max_pooling3d(b2,pool_size = [2,2,2], strides = [2,2,2], padding = "same");
#layer 3
c3a = tf.layers.conv3d(p2,filters = 256, kernel_size = [3,3,3], padding = "same");
b3a = tf.contrib.layers.layer_norm(c3a,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
c3b = tf.layers.conv3d(b3a,filters = 256, kernel_size = [3,3,3], padding = "same");
b3b = tf.contrib.layers.layer_norm(c3b,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
p3 = tf.layers.max_pooling3d(b3b,pool_size = [2,2,2], strides = [2,2,2], padding = "same");
#layer 4
c4a = tf.layers.conv3d(p3,filters = 512, kernel_size = [3,3,3], padding = "same");
b4a = tf.contrib.layers.layer_norm(c4a,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
c4b = tf.layers.conv3d(b4a,filters = 512, kernel_size = [3,3,3], padding = "same");
b4b = tf.contrib.layers.layer_norm(c4b,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
p4 = tf.layers.max_pooling3d(b4b,pool_size = [2,2,2], strides = [2,2,2], padding = "same");
# with tf.device('/device:GPU:2'):
#layer 5
c5a = tf.layers.conv3d(p4,filters = 512, kernel_size = [3,3,3], padding = "same");
b5a = tf.contrib.layers.layer_norm(c5a,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
c5b = tf.layers.conv3d(b5a,filters = 512, kernel_size = [3,3,3], padding = "same");
b5b = tf.contrib.layers.layer_norm(c5b,activation_fn = tf.nn.relu, trainable = mode == tf.estimator.ModeKeys.TRAIN);
p5 = tf.layers.max_pooling3d(b5b,pool_size = [2,2,2], strides = [2,2,2], padding = "same");
#flatten
f = tf.layers.flatten(p5);
d1 = tf.layers.dense(f,units = 4096, activation = tf.nn.relu);
dp1 = tf.layers.dropout(d1,training = mode == tf.estimator.ModeKeys.TRAIN);
d2 = tf.layers.dense(dp1,units = 4096, activation = tf.nn.relu);
dp2 = tf.layers.dropout(d2,training = mode == tf.estimator.ModeKeys.TRAIN);
logits = tf.layers.dense(dp2,units = class_num);
#predict mode
if mode == tf.estimator.ModeKeys.PREDICT:
action = tf.argmax(logits,axis = 1);
return tf.estimator.EstimatorSpec(mode = mode,predictions = action);
if mode == tf.estimator.ModeKeys.TRAIN:
onehot_labels = tf.one_hot(labels,class_num);
loss = tf.losses.softmax_cross_entropy(onehot_labels,logits);
loss = tf.identity(loss,name = "loss");
optimizer = tf.train.AdamOptimizer(1e-4);
train_op = optimizer.minimize(loss = loss, global_step = tf.train.get_global_step());
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op);
if mode == tf.estimator.ModeKeys.EVAL:
onehot_labels = tf.one_hot(labels,class_num);
loss = tf.losses.softmax_cross_entropy(onehot_labels,logits);
loss = tf.identity(loss,name = "loss");
eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels = labels,predictions = tf.argmax(logits,axis = 1))};
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, eval_metric_ops = eval_metric_ops);
raise Exception('Unknown mode of estimator!');
if __name__ == "__main__":
tf.app.run();