Skip to content

Commit 12ed38e

Browse files
Update Examples (aymericdamien#261)
* update examples * remove unecessary files * update eager api notebooks * add more examples * add more examples
1 parent f6f8083 commit 12ed38e

14 files changed

+1296
-24
lines changed

README.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ This tutorial was designed for easily diving into TensorFlow, through examples.
44

55
It is suitable for beginners who want to find clear and concise examples about TensorFlow. Besides the traditional 'raw' TensorFlow implementations, you can also find the latest TensorFlow API practices (such as `layers`, `estimator`, `dataset`, ...).
66

7-
**Update (03/18/2018):** TensorFlow's Eager API examples available! (TF v1.5+ recommended).
7+
**Update (07/25/2018):** Add new examples (GBDT, Word2Vec) + TF1.9 compatibility! (TF v1.9+ recommended).
88

9-
*If you are using older TensorFlow version (0.11 and under), please have a [look here](https://github.com/aymericdamien/TensorFlow-Examples/tree/0.11).*
9+
*If you are using older TensorFlow version (0.11 and under), please take a [look here](https://github.com/aymericdamien/TensorFlow-Examples/tree/0.11).*
1010

1111
## Tutorial index
1212

@@ -27,6 +27,8 @@ It is suitable for beginners who want to find clear and concise examples about T
2727
- **Nearest Neighbor** ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/nearest_neighbor.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/nearest_neighbor.py)). Implement Nearest Neighbor algorithm with TensorFlow.
2828
- **K-Means** ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/kmeans.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/kmeans.py)). Build a K-Means classifier with TensorFlow.
2929
- **Random Forest** ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/random_forest.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/random_forest.py)). Build a Random Forest classifier with TensorFlow.
30+
- **Gradient Boosted Decision Tree (GBDT)** ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/gradient_boosted_decision_tree.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/gradient_boosted_decision_tree.py)). Build a Gradient Boosted Decision Tree (GBDT) with TensorFlow.
31+
- **Word2Vec (Word Embedding)** ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/word2vec.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/word2vec.py)). Build a Word Embedding Model (Word2Vec) from Wikipedia data, with TensorFlow.
3032

3133
#### 3 - Neural Networks
3234
##### Supervised
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
""" Gradient Boosted Decision Tree (GBDT).
2+
3+
Implement a Gradient Boosted Decision tree with TensorFlow to classify
4+
handwritten digit images. This example is using the MNIST database of
5+
handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
6+
7+
Links:
8+
[MNIST Dataset](http://yann.lecun.com/exdb/mnist/).
9+
10+
Author: Aymeric Damien
11+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
12+
"""
13+
14+
from __future__ import print_function
15+
16+
import tensorflow as tf
17+
from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
18+
from tensorflow.contrib.boosted_trees.proto import learner_pb2 as gbdt_learner
19+
20+
# Ignore all GPUs (current TF GBDT does not support GPU).
21+
import os
22+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
23+
24+
# Import MNIST data
25+
# Set verbosity to display errors only (Remove this line for showing warnings)
26+
tf.logging.set_verbosity(tf.logging.ERROR)
27+
from tensorflow.examples.tutorials.mnist import input_data
28+
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
29+
source_url='http://yann.lecun.com/exdb/mnist/')
30+
31+
# Parameters
32+
batch_size = 4096 # The number of samples per batch
33+
num_classes = 10 # The 10 digits
34+
num_features = 784 # Each image is 28x28 pixels
35+
max_steps = 10000
36+
37+
# GBDT Parameters
38+
learning_rate = 0.1
39+
l1_regul = 0.
40+
l2_regul = 1.
41+
examples_per_layer = 1000
42+
num_trees = 10
43+
max_depth = 16
44+
45+
# Fill GBDT parameters into the config proto
46+
learner_config = gbdt_learner.LearnerConfig()
47+
learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
48+
learner_config.regularization.l1 = l1_regul
49+
learner_config.regularization.l2 = l2_regul / examples_per_layer
50+
learner_config.constraints.max_tree_depth = max_depth
51+
growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
52+
learner_config.growing_mode = growing_mode
53+
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
54+
learner_config.multi_class_strategy = (
55+
gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)\
56+
57+
# Create a TensorFlor GBDT Estimator
58+
gbdt_model = GradientBoostedDecisionTreeClassifier(
59+
model_dir=None, # No save directory specified
60+
learner_config=learner_config,
61+
n_classes=num_classes,
62+
examples_per_layer=examples_per_layer,
63+
num_trees=num_trees,
64+
center_bias=False,
65+
config=run_config)
66+
67+
# Display TF info logs
68+
tf.logging.set_verbosity(tf.logging.INFO)
69+
70+
# Define the input function for training
71+
input_fn = tf.estimator.inputs.numpy_input_fn(
72+
x={'images': mnist.train.images}, y=mnist.train.labels,
73+
batch_size=batch_size, num_epochs=None, shuffle=True)
74+
# Train the Model
75+
gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)
76+
77+
# Evaluate the Model
78+
# Define the input function for evaluating
79+
input_fn = tf.estimator.inputs.numpy_input_fn(
80+
x={'images': mnist.test.images}, y=mnist.test.labels,
81+
batch_size=batch_size, shuffle=False)
82+
# Use the Estimator 'evaluate' method
83+
e = gbdt_model.evaluate(input_fn=input_fn)
84+
85+
print("Testing Accuracy:", e['accuracy'])

examples/2_BasicModels/linear_regression_eager_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
import matplotlib.pyplot as plt
1111
import numpy as np
1212
import tensorflow as tf
13-
import tensorflow.contrib.eager as tfe
1413

1514
# Set Eager API
16-
tfe.enable_eager_execution()
15+
tf.enable_eager_execution()
16+
tfe = tf.contrib.eager
1717

1818
# Training Data
1919
train_X = [3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,

examples/2_BasicModels/logistic_regression_eager_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
from __future__ import absolute_import, division, print_function
1111

1212
import tensorflow as tf
13-
import tensorflow.contrib.eager as tfe
1413

1514
# Set Eager API
16-
tfe.enable_eager_execution()
15+
tf.enable_eager_execution()
16+
tfe = tf.contrib.eager
1717

1818
# Import MNIST data
1919
from tensorflow.examples.tutorials.mnist import input_data

examples/2_BasicModels/word2vec.py

+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
""" Word2Vec.
2+
3+
Implement Word2Vec algorithm to compute vector representations of words.
4+
This example is using a small chunk of Wikipedia articles to train from.
5+
6+
References:
7+
- Mikolov, Tomas et al. "Efficient Estimation of Word Representations
8+
in Vector Space.", 2013.
9+
10+
Links:
11+
- [Word2Vec] https://arxiv.org/pdf/1301.3781.pdf
12+
13+
Author: Aymeric Damien
14+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
15+
"""
16+
from __future__ import division, print_function, absolute_import
17+
18+
import collections
19+
import os
20+
import random
21+
import urllib
22+
import zipfile
23+
24+
import numpy as np
25+
import tensorflow as tf
26+
27+
# Training Parameters
28+
learning_rate = 0.1
29+
batch_size = 128
30+
num_steps = 3000000
31+
display_step = 10000
32+
eval_step = 200000
33+
34+
# Evaluation Parameters
35+
eval_words = ['five', 'of', 'going', 'hardware', 'american', 'britain']
36+
37+
# Word2Vec Parameters
38+
embedding_size = 200 # Dimension of the embedding vector
39+
max_vocabulary_size = 50000 # Total number of different words in the vocabulary
40+
min_occurrence = 10 # Remove all words that does not appears at least n times
41+
skip_window = 3 # How many words to consider left and right
42+
num_skips = 2 # How many times to reuse an input to generate a label
43+
num_sampled = 64 # Number of negative examples to sample
44+
45+
46+
# Download a small chunk of Wikipedia articles collection
47+
url = 'http://mattmahoney.net/dc/text8.zip'
48+
data_path = 'text8.zip'
49+
if not os.path.exists(data_path):
50+
print("Downloading the dataset... (It may take some time)")
51+
filename, _ = urllib.urlretrieve(url, data_path)
52+
print("Done!")
53+
# Unzip the dataset file. Text has already been processed
54+
with zipfile.ZipFile(data_path) as f:
55+
text_words = f.read(f.namelist()[0]).lower().split()
56+
57+
# Build the dictionary and replace rare words with UNK token
58+
count = [('UNK', -1)]
59+
# Retrieve the most common words
60+
count.extend(collections.Counter(text_words).most_common(max_vocabulary_size - 1))
61+
# Remove samples with less than 'min_occurrence' occurrences
62+
for i in range(len(count) - 1, -1):
63+
if count[i][1] < min_occurrence:
64+
count.pop(i)
65+
else:
66+
# The collection is ordered, so stop when 'min_occurrence' is reached
67+
break
68+
# Compute the vocabulary size
69+
vocabulary_size = len(count)
70+
# Assign an id to each word
71+
word2id = dict()
72+
for i, (word, _)in enumerate(count):
73+
word2id[word] = i
74+
75+
data = list()
76+
unk_count = 0
77+
for word in text_words:
78+
# Retrieve a word id, or assign it index 0 ('UNK') if not in dictionary
79+
index = word2id.get(word, 0)
80+
if index == 0:
81+
unk_count += 1
82+
data.append(index)
83+
count[0] = ('UNK', unk_count)
84+
id2word = dict(zip(word2id.values(), word2id.keys()))
85+
86+
print("Words count:", len(text_words))
87+
print("Unique words:", len(set(text_words)))
88+
print("Vocabulary size:", vocabulary_size)
89+
print("Most common words:", count[:10])
90+
91+
data_index = 0
92+
# Generate training batch for the skip-gram model
93+
def next_batch(batch_size, num_skips, skip_window):
94+
global data_index
95+
assert batch_size % num_skips == 0
96+
assert num_skips <= 2 * skip_window
97+
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
98+
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
99+
# get window size (words left and right + current one)
100+
span = 2 * skip_window + 1
101+
buffer = collections.deque(maxlen=span)
102+
if data_index + span > len(data):
103+
data_index = 0
104+
buffer.extend(data[data_index:data_index + span])
105+
data_index += span
106+
for i in range(batch_size // num_skips):
107+
context_words = [w for w in range(span) if w != skip_window]
108+
words_to_use = random.sample(context_words, num_skips)
109+
for j, context_word in enumerate(words_to_use):
110+
batch[i * num_skips + j] = buffer[skip_window]
111+
labels[i * num_skips + j, 0] = buffer[context_word]
112+
if data_index == len(data):
113+
buffer.extend(data[0:span])
114+
data_index = span
115+
else:
116+
buffer.append(data[data_index])
117+
data_index += 1
118+
# Backtrack a little bit to avoid skipping words in the end of a batch
119+
data_index = (data_index + len(data) - span) % len(data)
120+
return batch, labels
121+
122+
123+
# Input data
124+
X = tf.placeholder(tf.int32, shape=[None])
125+
# Input label
126+
Y = tf.placeholder(tf.int32, shape=[None, 1])
127+
128+
# Ensure the following ops & var are assigned on CPU
129+
# (some ops are not compatible on GPU)
130+
with tf.device('/cpu:0'):
131+
# Create the embedding variable (each row represent a word embedding vector)
132+
embedding = tf.Variable(tf.random_normal([vocabulary_size, embedding_size]))
133+
# Lookup the corresponding embedding vectors for each sample in X
134+
X_embed = tf.nn.embedding_lookup(embedding, X)
135+
136+
# Construct the variables for the NCE loss
137+
nce_weights = tf.Variable(tf.random_normal([vocabulary_size, embedding_size]))
138+
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
139+
140+
# Compute the average NCE loss for the batch
141+
loss_op = tf.reduce_mean(
142+
tf.nn.nce_loss(weights=nce_weights,
143+
biases=nce_biases,
144+
labels=Y,
145+
inputs=X_embed,
146+
num_sampled=num_sampled,
147+
num_classes=vocabulary_size))
148+
149+
# Define the optimizer
150+
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
151+
train_op = optimizer.minimize(loss_op)
152+
153+
# Evaluation
154+
# Compute the cosine similarity between input data embedding and every embedding vectors
155+
X_embed_norm = X_embed / tf.sqrt(tf.reduce_sum(tf.square(X_embed)))
156+
embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True))
157+
cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True)
158+
159+
# Initialize the variables (i.e. assign their default value)
160+
init = tf.global_variables_initializer()
161+
162+
with tf.Session() as sess:
163+
164+
# Run the initializer
165+
sess.run(init)
166+
167+
# Testing data
168+
x_test = np.array([word2id[w] for w in eval_words])
169+
170+
average_loss = 0
171+
for step in xrange(1, num_steps + 1):
172+
# Get a new batch of data
173+
batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
174+
# Run training op
175+
_, loss = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
176+
average_loss += loss
177+
178+
if step % display_step == 0 or step == 1:
179+
if step > 1:
180+
average_loss /= display_step
181+
print("Step " + str(step) + ", Average Loss= " + \
182+
"{:.4f}".format(average_loss))
183+
average_loss = 0
184+
185+
# Evaluation
186+
if step % eval_step == 0 or step == 1:
187+
print("Evaluation...")
188+
sim = sess.run(cosine_sim_op, feed_dict={X: x_test})
189+
for i in xrange(len(eval_words)):
190+
top_k = 8 # number of nearest neighbors
191+
nearest = (-sim[i, :]).argsort()[1:top_k + 1]
192+
log_str = '"%s" nearest neighbors:' % eval_words[i]
193+
for k in xrange(top_k):
194+
log_str = '%s %s,' % (log_str, id2word[nearest[k]])
195+
print(log_str)

examples/3_NeuralNetworks/neural_network.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def model_fn(features, labels, mode):
6161
if mode == tf.estimator.ModeKeys.PREDICT:
6262
return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
6363

64-
# Define loss and optimizer
64+
# Define loss and optimizer
6565
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
6666
logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
6767
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

examples/3_NeuralNetworks/neural_network_eager_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
from __future__ import print_function
1717

1818
import tensorflow as tf
19-
import tensorflow.contrib.eager as tfe
2019

2120
# Set Eager API
22-
tfe.enable_eager_execution()
21+
tf.enable_eager_execution()
22+
tfe = tf.contrib.eager
2323

2424
# Import MNIST data
2525
from tensorflow.examples.tutorials.mnist import input_data

notebooks/1_Introduction/basic_eager_api.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
"from __future__ import absolute_import, division, print_function\n",
4343
"\n",
4444
"import numpy as np\n",
45-
"import tensorflow as tf\n",
46-
"import tensorflow.contrib.eager as tfe"
45+
"import tensorflow as tf"
4746
]
4847
},
4948
{
@@ -64,7 +63,8 @@
6463
"source": [
6564
"# Set Eager API\n",
6665
"print(\"Setting Eager mode...\")\n",
67-
"tfe.enable_eager_execution()"
66+
"tf.enable_eager_execution()\n",
67+
"tfe = tf.contrib.eager"
6868
]
6969
},
7070
{

0 commit comments

Comments
 (0)