Skip to content

Commit 76fef4b

Browse files
authored
Merge pull request #75 from ponder-lab/test_autoencoder
Add autoencoder test.
2 parents e50f711 + 39e6cb0 commit 76fef4b

File tree

2 files changed

+192
-0
lines changed

2 files changed

+192
-0
lines changed

com.ibm.wala.cast.python.ml.test/source/com/ibm/wala/cast/python/ml/test/TestTensorflowModel.java

+4
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ public void testTf2()
245245
3); // NOTE: Change to 2 tensor parameters and 5 tensor variables once
246246
// https://github.com/wala/ML/issues/127 is fixed. Values 2 and 3 will correspond to the
247247
// tensor parameters.
248+
testTf2("autoencoder.py", "encoder", 1, 10, 2);
249+
testTf2("autoencoder.py", "mean_square", 1, 1, 3);
250+
testTf2("autoencoder.py", "run_optimization", 1, 1, 2);
251+
testTf2("autoencoder.py", "decoder", 1, 8, 2);
248252
}
249253

250254
private void testTf2(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# From https://github.com/aymericdamien/TensorFlow-Examples/blob/6dcbe14649163814e72a22a999f20c5e247ce988/tensorflow_v2/notebooks/3_NeuralNetworks/autoencoder.ipynb.
2+
3+
# %%
4+
# """
5+
# # Auto-Encoder Example
6+
7+
# Build a 2 layers auto-encoder with TensorFlow v2 to compress images to a lower latent space and then reconstruct them.
8+
9+
# - Author: Aymeric Damien
10+
# - Project: https://github.com/aymericdamien/TensorFlow-Examples/
11+
# """
12+
13+
# %%
14+
# """
15+
# ## Auto-Encoder Overview
16+
17+
# <img src="http://kvfrans.com/content/images/2016/08/autoenc.jpg" alt="ae" style="width: 800px;"/>
18+
19+
# References:
20+
# - [Gradient-based learning applied to document recognition](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Proceedings of the IEEE, 86(11):2278-2324, November 1998.
21+
22+
# ## MNIST Dataset Overview
23+
24+
# This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255.
25+
26+
# In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28).
27+
28+
# ![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png)
29+
30+
# More info: http://yann.lecun.com/exdb/mnist/
31+
# """
32+
33+
# %%
34+
from __future__ import absolute_import, division, print_function
35+
36+
import tensorflow as tf
37+
print("TensorFlow version:", tf.__version__)
38+
assert(tf.__version__ == "2.9.3")
39+
import numpy as np
40+
41+
# %%
42+
# MNIST Dataset parameters.
43+
num_features = 784 # data features (img shape: 28*28).
44+
45+
# Training parameters.
46+
learning_rate = 0.01
47+
training_steps = 1
48+
batch_size = 256
49+
display_step = 1000
50+
51+
# Network Parameters
52+
num_hidden_1 = 128 # 1st layer num features.
53+
num_hidden_2 = 64 # 2nd layer num features (the latent dim).
54+
55+
# %%
56+
# Prepare MNIST data.
57+
from tensorflow.keras.datasets import mnist
58+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
59+
# Convert to float32.
60+
x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
61+
# Flatten images to 1-D vector of 784 features (28*28).
62+
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
63+
# Normalize images value from [0, 255] to [0, 1].
64+
x_train, x_test = x_train / 255., x_test / 255.
65+
66+
# %%
67+
# Use tf.data API to shuffle and batch data.
68+
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
69+
train_data = train_data.repeat().shuffle(10000).batch(batch_size).prefetch(1)
70+
71+
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
72+
test_data = test_data.repeat().batch(batch_size).prefetch(1)
73+
74+
# %%
75+
# Store layers weight & bias
76+
77+
# A random value generator to initialize weights.
78+
random_normal = tf.initializers.RandomNormal()
79+
80+
weights = {
81+
'encoder_h1': tf.Variable(random_normal([num_features, num_hidden_1])),
82+
'encoder_h2': tf.Variable(random_normal([num_hidden_1, num_hidden_2])),
83+
'decoder_h1': tf.Variable(random_normal([num_hidden_2, num_hidden_1])),
84+
'decoder_h2': tf.Variable(random_normal([num_hidden_1, num_features])),
85+
}
86+
biases = {
87+
'encoder_b1': tf.Variable(random_normal([num_hidden_1])),
88+
'encoder_b2': tf.Variable(random_normal([num_hidden_2])),
89+
'decoder_b1': tf.Variable(random_normal([num_hidden_1])),
90+
'decoder_b2': tf.Variable(random_normal([num_features])),
91+
}
92+
93+
94+
# %%
95+
# Building the encoder.
96+
def encoder(x):
97+
# Encoder Hidden layer with sigmoid activation.
98+
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
99+
biases['encoder_b1']))
100+
# Encoder Hidden layer with sigmoid activation.
101+
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
102+
biases['encoder_b2']))
103+
return layer_2
104+
105+
106+
# Building the decoder.
107+
def decoder(x):
108+
# Decoder Hidden layer with sigmoid activation.
109+
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
110+
biases['decoder_b1']))
111+
# Decoder Hidden layer with sigmoid activation.
112+
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
113+
biases['decoder_b2']))
114+
return layer_2
115+
116+
117+
# %%
118+
# Mean square loss between original images and reconstructed ones.
119+
def mean_square(reconstructed, original):
120+
return tf.reduce_mean(tf.pow(original - reconstructed, 2))
121+
122+
123+
# Adam optimizer.
124+
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
125+
126+
127+
# %%
128+
# Optimization process.
129+
def run_optimization(x):
130+
# Wrap computation inside a GradientTape for automatic differentiation.
131+
with tf.GradientTape() as g:
132+
reconstructed_image = decoder(encoder(x))
133+
loss = mean_square(reconstructed_image, x)
134+
135+
# Variables to update, i.e. trainable variables.
136+
trainable_variables = list(weights.values()) + list(biases.values())
137+
138+
# Compute gradients.
139+
gradients = g.gradient(loss, trainable_variables)
140+
141+
# Update W and b following gradients.
142+
optimizer.apply_gradients(zip(gradients, trainable_variables))
143+
144+
return loss
145+
146+
147+
# %%
148+
# Run training for the given number of steps.
149+
for step, (batch_x, _) in enumerate(train_data.take(training_steps + 1)):
150+
151+
# Run the optimization.
152+
loss = run_optimization(batch_x)
153+
154+
if step % display_step == 0:
155+
print("step: %i, loss: %f" % (step, loss))
156+
157+
# %%
158+
# Testing and Visualization.
159+
import matplotlib.pyplot as plt
160+
161+
# %%
162+
# Encode and decode images from test set and visualize their reconstruction.
163+
n = 4
164+
canvas_orig = np.empty((28 * n, 28 * n))
165+
canvas_recon = np.empty((28 * n, 28 * n))
166+
for i, (batch_x, _) in enumerate(test_data.take(n)):
167+
# Encode and decode the digit image.
168+
reconstructed_images = decoder(encoder(batch_x))
169+
# Display original images.
170+
for j in range(n):
171+
# Draw the generated digits.
172+
img = batch_x[j].numpy().reshape([28, 28])
173+
canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = img
174+
# Display reconstructed images.
175+
for j in range(n):
176+
# Draw the generated digits.
177+
reconstr_img = reconstructed_images[j].numpy().reshape([28, 28])
178+
canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = reconstr_img
179+
180+
# print("Original Images")
181+
# plt.figure(figsize=(n, n))
182+
# plt.imshow(canvas_orig, origin="upper", cmap="gray")
183+
# plt.show()
184+
#
185+
# print("Reconstructed Images")
186+
# plt.figure(figsize=(n, n))
187+
# plt.imshow(canvas_recon, origin="upper", cmap="gray")
188+
# plt.show()

0 commit comments

Comments
 (0)