Skip to content

Commit

Permalink
adding FACES_project and similarity_project to the repo. Faces_projec…
Browse files Browse the repository at this point in the history
…t supports cosine similarity for the faces data set and similarity project is the final similarity feature support for the notre dame stimuli dataset
  • Loading branch information
yajaira5 committed Dec 20, 2016
1 parent 21a38e5 commit dbe567e
Show file tree
Hide file tree
Showing 21 changed files with 2,127 additions and 0 deletions.
66 changes: 66 additions & 0 deletions FACES_project/data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import hickle as hkl
import numpy as np
from keras import backend as K
from keras.preprocessing.image import Iterator

# Data generator that creates sequences for input into PredNet.
class SequenceGenerator(Iterator):
def __init__(self, data_file, source_file, nt,
batch_size=8, shuffle=False, seed=None,
output_mode='error', sequence_start_mode='all', N_seq=None,
dim_ordering=K.image_dim_ordering()):
self.X = hkl.load(data_file) # X will be like (n_images, nb_cols, nb_rows, nb_channels)
self.sources = hkl.load(source_file) # source for each image so when creating sequences can assure that consecutive frames are from same video
self.nt = nt
self.batch_size = batch_size
self.dim_ordering = dim_ordering
assert sequence_start_mode in {'all', 'unique'}, 'sequence_start_mode must be in {all, unique}'
self.sequence_start_mode = sequence_start_mode
assert output_mode in {'error', 'prediction'}, 'output_mode must be in {error, prediction}'
self.output_mode = output_mode

if self.dim_ordering == 'th':
self.X = np.transpose(self.X, (0, 3, 1, 2))
self.im_shape = self.X[0].shape

if self.sequence_start_mode == 'all': # allow for any possible sequence, starting from any frame
self.possible_starts = np.array([i for i in range(self.X.shape[0] - self.nt) if self.sources[i] == self.sources[i + self.nt - 1]])
elif self.sequence_start_mode == 'unique': #create sequences where each unique frame is in at most one sequence
curr_location = 0
possible_starts = []
while curr_location < self.X.shape[0] - self.nt + 1:
if self.sources[curr_location] == self.sources[curr_location + self.nt - 1]:
possible_starts.append(curr_location)
curr_location += self.nt
else:
curr_location += 1
self.possible_starts = possible_starts

if shuffle:
self.possible_starts = np.random.permutation(self.possible_starts)
if N_seq is not None and len(self.possible_starts) > N_seq: # select a subset of sequences if want to
self.possible_starts = self.possible_starts[:N_seq]
self.N_sequences = len(self.possible_starts)
super(SequenceGenerator, self).__init__(len(self.possible_starts), batch_size, shuffle, seed)

def next(self):
with self.lock:
index_array, current_index, current_batch_size = next(self.index_generator)
batch_x = np.zeros((current_batch_size, self.nt) + self.im_shape, np.float32)
for i, idx in enumerate(index_array):
idx = self.possible_starts[idx]
batch_x[i] = self.preprocess(self.X[idx:idx+self.nt])
if self.output_mode == 'error': # model outputs errors, so y should be zeros
batch_y = np.zeros(current_batch_size, np.float32)
elif self.output_mode == 'prediction': # output actual pixels
batch_y = batch_x
return batch_x, batch_y

def preprocess(self, X):
return X.astype(np.float32) / 255

def create_all(self):
X_all = np.zeros((self.N_sequences, self.nt) + self.im_shape, np.float32)
for i, idx in enumerate(self.possible_starts):
X_all[i] = self.preprocess(self.X[idx:idx+self.nt])
return X_all
226 changes: 226 additions & 0 deletions FACES_project/faces_eval_cos_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
'''
Evaluate trained PredNet on KITTI sequences.
Calculates mean-squared error and plots predictions.
'''

import os
import numpy as np
import random
from operator import itemgetter
from scipy.spatial import distance as dist #for computing distance of vectors
import scipy.io as sio
from six.moves import cPickle
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.text
#from sklearn.metrics.pairwise import cosine_similarity

import hickle as hkl


from keras import backend as K
from keras.models import Model, model_from_json
from keras.layers import Input, Dense, Flatten

from prednet import PredNet
from data_utils import SequenceGenerator
from faces_settings import *

batch_size = 10
nt = 10

weights_file = os.path.join(WEIGHTS_DIR, 'prednet_faces_weights.hdf5')
json_file = os.path.join(WEIGHTS_DIR, 'prednet_faces_model.json')
test_file = os.path.join(DATA_DIR, 'X_test.hkl')

# Load trained model
f = open(json_file, 'r')
json_string = f.read()
f.close()
train_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet})
train_model.load_weights(weights_file)

# Create testing model (to output representation layers)
layer_config = train_model.layers[1].get_config()
layer_config['output_mode'] = 'all_R'
test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config)
input_shape = list(train_model.layers[0].batch_input_shape[1:])
input_shape[0] = nt
inputs = Input(shape=tuple(input_shape))
repLayers = test_prednet(inputs)
test_model = Model(input=inputs, output=repLayers)

#Get Representation layers vector for the given testing set
X_test = hkl.load(test_file)
X_hat = test_model.predict(X_test, batch_size)


#Create distribution Within Object Transformations
#to ensure the same random numbers are computed on the next run
numObjs = X_hat.shape[0] #number of different objects
numTransf = X_hat.shape[1] #number of transformations/time-steps per object
numSamplesToDraw=500

#testing cosine similarity , use a random normal distribution
cosSimTest = np.zeros((numSamplesToDraw,1))
for i in range(numSamplesToDraw):
cosSimTest[i] = 1-dist.cosine(np.random.normal(size=X_hat.shape[2]), np.random.normal(size=X_hat.shape[2]))

#Cos Sim Within Same Objs
distWithinObjs = np.zeros((numSamplesToDraw,1))
sampleObjIDs_1 = np.random.choice(200,size=(numSamplesToDraw,1),replace=True)
#ignore the rep layer for nt=0 as this will all be the same for all objects as the network
#lags behind one step since it has not seen anything yet
sampleTransIDs_1 = np.random.choice(9,size=(numSamplesToDraw,1),replace=True)
sampleTransIDs_2 = np.random.choice(9,size=(numSamplesToDraw,1),replace=True)
sampleTransIDs_1 = sampleTransIDs_1+1
sampleTransIDs_2 = sampleTransIDs_2+1
for i in range(numSamplesToDraw):
distWithinObjs[i] = 1-dist.cosine(X_hat[sampleObjIDs_1[i],sampleTransIDs_1[i],:],X_hat[sampleObjIDs_1[i],sampleTransIDs_2[i],:])

'''
for i in range((numSamplesToDraw,1)):
objIdx=random.randint(0,numObjs-1)
transIdx_1= random.randint(1,numTransf-1)
transIdx_2 = random.randint(1,numTransf-1)
distWithinObjs[i] = 1-dist.cosine(X_hat[objIdx, transIdx_1,:],X_hat[objIdx, transIdx_2, :] )
'''

#Cosine Similarity Across Objects
distBetweenObjs = np.zeros((numSamplesToDraw,1))
sampleObjIDs_1 = np.random.choice(200,size=(numSamplesToDraw,1),replace=True)
sampleObjIDs_2 = np.random.choice(200,size=(numSamplesToDraw,1),replace=True)
#ignore the rep layer for nt=0 as this will all be the same for all objects as the network
#lags behind one step since it has not seen anything yet
sampleTransIDs_1 = np.random.choice(9,size=(numSamplesToDraw,1),replace=True)
sampleTransIDs_2 = np.random.choice(9,size=(numSamplesToDraw,1),replace=True)
sampleTransIDs_1 = sampleTransIDs_1+1
sampleTransIDs_2 = sampleTransIDs_2+1
for i in range(numSamplesToDraw):
distBetweenObjs[i] = 1-(dist.cosine(X_hat[sampleObjIDs_1[i],sampleTransIDs_1[i],:],X_hat[sampleObjIDs_2[i],sampleTransIDs_2[i],:]))



#Create histogram
'''
numSamplesHist = 1000
rankHistogramArray = np.zeros(numObjs)
#tmpRandSamples = np.zeros((numObjs, X_hat.shape[2]))
samplesObjDistArray = np.zeros((numObjs,2))
for k in range (numSamplesHist):
#object to compare to
trueObjIdx = random.randint(0,numObjs-1)
trueTransIdx = random.randint(0,numTransf-1)
#select a random transformation/sample for each object and compute cosine distance
samplesObjDistArray[:,0]=range(numObjs)#initialize first column to have the objID in order
samplesObjDistArray[:,1]= 0.0 #reset the distance value
for m in range(numObjs):
samplesObjDistArray[m,1] = dist.cosine(X_hat[trueObjIdx, trueTransIdx,:],X_hat[m,random.randint(0,numTransf-1),:])
#sort array by distance
samplesObjDistArray=samplesObjDistArray.tolist()#make a list
samplesObjDistArray.sort(key=itemgetter(1))#sort by the first column =distance
#find trueObject position within the array
samplesObjDistArray = np.array(samplesObjDistArray) #change back to np array
trueRankIdx = np.where(samplesObjDistArray[:,0]==trueObjIdx)[0][0] #find distance position of true object
#increment proper rankIndx for histogram
rankHistogramArray[trueRankIdx] = rankHistogramArray[trueRankIdx]+1 #increment true rank distance position by 1
'''

#ptest permute the labels and compute mean differences
numPermTests=200
shuffledDistancesArray = np.concatenate((distWithinObjs,distBetweenObjs),axis=0)
middlePosition = shuffledDistancesArray.shape[0]/2
permMeanDiffArray = np.zeros((numPermTests,1))
for i in range(numPermTests):
np.random.shuffle(shuffledDistancesArray)
rndMeanWithin=np.mean(shuffledDistancesArray[0:middlePosition])
rndMeanBetween = np.mean(shuffledDistancesArray[middlePosition:])
permMeanDiffArray[i] = rndMeanBetween-rndMeanWithin

# Plot distributions and histogram
plot_save_dir = os.path.join(RESULTS_SAVE_DIR, 'distribution_plots/')
fig = plt.figure() #3 plots

#plot cos sim test
ax=fig.add_subplot(5,1,1)
ax.set_title("Cos Sim for Norm Distribution")
ax.hist(cosSimTest,facecolor='purple')

#plot cos sim within same objs
ax = fig.add_subplot(5,1,2)
sampledMean_within = np.mean(distWithinObjs)
ax.set_title('Sampled Cosine Similarity Within Objects, (mean = %s)'%(sampledMean_within))
ax.hist(distWithinObjs,facecolor='green')
#ax.plot(distWithinObjs)
#plt.show()

ax=fig.add_subplot(5,1,3)
sampledMean_between=np.mean(distBetweenObjs)
ax.set_title('Sampled Cosine Similarity Between Objects, (mean = %s)'%(sampledMean_between))
ax.hist(distBetweenObjs,facecolor ='blue')
#ax.plot(distBetweenObjs)
#plt.show()

#plots interleave
ax=fig.add_subplot(5,1,4)
sampledMean_difference=sampledMean_between-sampledMean_within
ax.set_title('Sampled Cosine Similarity Within and Between Objects, (mean = %s)'%(sampledMean_difference))
ax.hist(distWithinObjs,facecolor ='green')
ax.hist(distBetweenObjs,facecolor ='blue', alpha=0.5)
#ax.legend(['y=distWithin', 'y=distBetween'], loc='upper left')
plt.tight_layout()

#plot permuted mean distribution
ax=fig.add_subplot(5,1,5)
permMeanDiff = np.mean(permMeanDiffArray)
ax.set_title('Permuted Mean Difference Distribution, (permMeanDiff = %s)' %(permMeanDiff))
ax.set_ylabel("Mean Frequency")
ax.hist(permMeanDiffArray,facecolor ='purple')
#sampledMeanPosition = np.where(permMeanDiffArray[:,0]==sampledMeanDiff)[0][0]
#ax.axvline(x=sampledMeanPosition, color='red')
#ax.legend(['y=perm mean diff'], loc='upper left')


#ax=fig.add_subplot(3,1,3)
#ax.set_title("Rank Histogram (1k samples)")
#ax.set_xlabel("Bins")
#ax.set_ylabel("Rank Frequency")
#ax.hist(rankHistogramArray, bins=200)


fig.subplots_adjust(hspace = 1)
plt.tight_layout()
plt.savefig(plot_save_dir + 'plot_distributions_3layers.png')
#sio.savemat(plot_save_dir+'permMeanDiffArray_4layers.mat',{'permMeanDiffArray':permMeanDiffArray})
#sio.savemat(plot_save_dir+'cosDist_withinObjs_4layers.mat', {'distWithinObjs':distWithinObjs})
#sio.savemat(plot_save_dir+'cosDist_acrossObjs_4layers.mat',{'distBetweenObjs':distBetweenObjs})
#sio.savemat(plot_save_dir+'rankHistogram.mat_4layers',{'rankHistogramArray2':rankHistogramArray})

#gs = gridspec.GridSpec(2)
#gs.update(wspace=0., hspace=0.)
#plot_save_dir = os.path.join(RESULTS_SAVE_DIR, 'distribution_plots/')
#if not os.path.exists(plot_save_dir): os.mkdir(plot_save_dir)
#plot_idx = np.random.permutation(X_test.shape[0])[:n_plot]
#for i in plot_idx:
# for t in range(nt):
# plt.subplot(gs[t])
# plt.imshow(X_test[i,t,:,:,0], interpolation='none') #imshow input(2D) or (3D with the last dimension being 3 or greater) if 1 channel od imshow(2D)
# plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
# if t==0: plt.ylabel('Actual', fontsize=10)

# plt.subplot(gs[t + nt])
# plt.imshow(X_hat[i,t,:,:,0], interpolation='none')
# plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
# if t==0: plt.ylabel('Predicted', fontsize=10)

# plt.savefig(plot_save_dir + 'plot_' + str(i) + '.png')
# plt.clf()



94 changes: 94 additions & 0 deletions FACES_project/faces_eval_predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
'''
Evaluate trained PredNet on KITTI sequences.
Calculates mean-squared error and plots predictions.
'''

import os
import numpy as np
from six.moves import cPickle
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import hickle as hkl

from keras import backend as K
from keras.models import Model, model_from_json
from keras.layers import Input, Dense, Flatten

from prednet import PredNet
from data_utils import SequenceGenerator
from faces_settings import *


n_plot = 20
batch_size = 10
nt = 10

weights_file = os.path.join(WEIGHTS_DIR, 'prednet_faces_weights.hdf5')
json_file = os.path.join(WEIGHTS_DIR, 'prednet_faces_model.json')
test_file = os.path.join(DATA_DIR, 'X_test.hkl')
test_sources = os.path.join(DATA_DIR, 'sources_test.hkl')

# Load trained model
f = open(json_file, 'r')
json_string = f.read()
f.close()
train_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet})
train_model.load_weights(weights_file)

# Create testing model (to output predictions)
layer_config = train_model.layers[1].get_config()
layer_config['output_mode'] = 'prediction'


dim_ordering = layer_config['dim_ordering']
test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config)
input_shape = list(train_model.layers[0].batch_input_shape[1:])
input_shape[0] = nt
inputs = Input(shape=tuple(input_shape))
predictions = test_prednet(inputs)
test_model = Model(input=inputs, output=predictions)

#test_generator = SequenceGenerator(test_file, test_sources, nt, sequence_start_mode='unique', dim_ordering=dim_ordering)
#X_test = test_generator.create_all()

X_test = hkl.load(test_file)
X_hat = test_model.predict(X_test, batch_size)

if dim_ordering == 'th':
X_test = np.transpose(X_test, (0, 1, 3, 4, 2))
X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2))

# Compare MSE of PredNet predictions vs. using last frame. Write results to prediction_scores.txt
mse_model = np.mean( (X_test[:, 1:] - X_hat[:, 1:])**2 ) # look at all timesteps except the first
mse_prev = np.mean( (X_test[:, :-1] - X_test[:, 1:])**2 )
if not os.path.exists(RESULTS_SAVE_DIR): os.mkdir(RESULTS_SAVE_DIR)
f = open(RESULTS_SAVE_DIR + 'prediction_scores.txt', 'w')
f.write("Model MSE: %f\n" % mse_model)
f.write("Previous Frame MSE: %f" % mse_prev)
f.close()

# Plot some predictions
aspect_ratio = float(X_hat.shape[2]) / X_hat.shape[3]
plt.figure(figsize = (nt, 2*aspect_ratio))
gs = gridspec.GridSpec(2, nt)
gs.update(wspace=0., hspace=0.)
plot_save_dir = os.path.join(RESULTS_SAVE_DIR, 'prediction_plots/')
if not os.path.exists(plot_save_dir): os.mkdir(plot_save_dir)
plot_idx = np.random.permutation(X_test.shape[0])[:n_plot]
for i in plot_idx:
for t in range(nt):
plt.subplot(gs[t])
plt.imshow(X_test[i,t,:,:,0], interpolation='none') #imshow input(2D) or (3D with the last dimension being 3 or greater) if 1 channel od imshow(2D)
plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
if t==0: plt.ylabel('Actual', fontsize=10)

plt.subplot(gs[t + nt])
plt.imshow(X_hat[i,t,:,:,0], interpolation='none')
plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
if t==0: plt.ylabel('Predicted', fontsize=10)

plt.savefig(plot_save_dir + 'plot_' + str(i) + '.png')
plt.clf()
1 change: 1 addition & 0 deletions FACES_project/faces_model_data/prednet_faces_model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class_name": "Model", "keras_version": "1.0.7", "config": {"layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 10, 1, 64, 64], "name": "input_1", "input_dtype": "float32"}, "inbound_nodes": [], "name": "input_1"}, {"class_name": "PredNet", "config": {"trainable": true, "dim_ordering": "th", "pixel_max": 1.0, "Ahat_filt_sizes": [3, 3, 3, 3], "R_filt_sizes": [3, 3, 3, 3], "unroll": false, "consume_less": "cpu", "input_length": null, "LSTM_inner_activation": "hard_sigmoid", "output_mode": "error", "stateful": false, "error_activation": "relu", "A_activation": "relu", "A_filt_sizes": [3, 3, 3], "stack_sizes": [1, 48, 96, 192], "name": "prednet_1", "go_backwards": false, "extrap_start_time": null, "input_dim": null, "R_stack_sizes": [1, 48, 96, 192], "return_sequences": true, "LSTM_activation": "tanh"}, "inbound_nodes": [[["input_1", 0, 0]]], "name": "prednet_1"}, {"class_name": "TimeDistributed", "config": {"layer": {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_1", "activity_regularizer": null, "trainable": false, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 1}}, "trainable": false, "name": "timedistributed_1"}, "inbound_nodes": [[["prednet_1", 0, 0]]], "name": "timedistributed_1"}, {"class_name": "Flatten", "config": {"trainable": true, "name": "flatten_1"}, "inbound_nodes": [[["timedistributed_1", 0, 0]]], "name": "flatten_1"}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_2", "activity_regularizer": null, "trainable": false, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 1}, "inbound_nodes": [[["flatten_1", 0, 0]]], "name": "dense_2"}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense_2", 0, 0]], "name": "model_1"}}
Binary file not shown.
Loading

0 comments on commit dbe567e

Please sign in to comment.