Skip to content

Commit

Permalink
added jia's code, and some other things I hadn't been tracking
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Goodfellow committed May 16, 2012
1 parent 2a5d6eb commit 6087a83
Show file tree
Hide file tree
Showing 94 changed files with 10,021 additions and 0 deletions.
29 changes: 29 additions & 0 deletions dbm/code_DBM/make_test.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
makebatches
[numcases numdims numbatches]=size(batchdata);
data = [batchdata(:,:,1)];
N=1;

load fullmnist_dbm
D = 5;
N1 = 6;
N2 = 7;
ofs = 622;
data = data(1,ofs:(ofs+D-1));

vishid = 10 * vishid(ofs:(ofs+D-1),1:N1);
hidpen = 10 * hidpen(1:N1,1:N2);
visbiases = visbiases(1,ofs:(ofs+D-1));
hidbiases = hidbiases(1,1:N1);
penbiases = penbiases(1,1:N2);

[numdims numhids] = size(vishid);
[numhids numpens] = size(hidpen);

[numcases numdims numbatches]=size(batchdata);
N=numcases;
[h1, h2] = ...
mf_class(data,vishid,hidbiases,visbiases,hidpen,penbiases);


save('model.mat','vishid','hidpen','visbiases','hidbiases','penbiases')
save('data.mat', 'data', 'h1', 'h2' )
53 changes: 53 additions & 0 deletions dbm/code_DBM_true_unsup/CG_MNIST.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
function [f, df] = ECG1(VV,Dim,XX,target,temp_h2);

numdims = Dim(1);
numhids = Dim(2);
numpens = Dim(3);
N = size(XX,1);

X=VV;
% Do decomversion.
w1_vishid = reshape(X(1:numdims*numhids),numdims,numhids);
xxx = numdims*numhids;
w1_penhid = reshape(X(xxx+1:xxx+numpens*numhids),numpens,numhids);
xxx = xxx+numpens*numhids;
hidpen = reshape(X(xxx+1:xxx+numhids*numpens),numhids,numpens);
xxx = xxx+numhids*numpens;
w_class = reshape(X(xxx+1:xxx+numpens*10),numpens,10);
xxx = xxx+numpens*10;
hidbiases = reshape(X(xxx+1:xxx+numhids),1,numhids);
xxx = xxx+numhids;
penbiases = reshape(X(xxx+1:xxx+numpens),1,numpens);
xxx = xxx+numpens;
topbiases = reshape(X(xxx+1:xxx+10),1,10);
xxx = xxx+10;

bias_hid= repmat(hidbiases,N,1);
bias_pen = repmat(penbiases,N,1);
bias_top = repmat(topbiases,N,1);

w1probs = 1./(1 + exp(-XX*w1_vishid -temp_h2*w1_penhid - bias_hid ));
w2probs = 1./(1 + exp(-w1probs*hidpen - bias_pen));
targetout = exp(w2probs*w_class + bias_top );
targetout = targetout./repmat(sum(targetout,2),1,10);

f = -sum(sum( target(:,1:end).*log(targetout)));

IO = (targetout-target(:,1:end));
Ix_class=IO;
dw_class = w2probs'*Ix_class;
dtopbiases = sum(Ix_class);

Ix2 = (Ix_class*w_class').*w2probs.*(1-w2probs);
dw2_hidpen = w1probs'*Ix2;
dw2_biases = sum(Ix2);

Ix1 = (Ix2*hidpen').*w1probs.*(1-w1probs);
dw1_penhid = temp_h2'*Ix1;

dw1_vishid = XX'*Ix1;
dw1_biases = sum(Ix1);

df = [dw1_vishid(:)' dw1_penhid(:)' dw2_hidpen(:)' dw_class(:)' dw1_biases(:)' dw2_biases(:)' dtopbiases(:)']';


172 changes: 172 additions & 0 deletions dbm/code_DBM_true_unsup/backprop.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.


test_err=[];
test_crerr=[];
train_err=[];
train_crerr=[];

fprintf(1,'\nTraining discriminative model on MNIST by minimizing cross entropy error. \n');
fprintf(1,'60 batches of 1000 cases each. \n');

[numcases numdims numbatches]=size(batchdata);
N=numcases;

load fullmnist_dbm
[numdims numhids] = size(vishid);
[numhids numpens] = size(hidpen);

%%%%%% Preprocess the data %%%%%%%%%%%%%%%%%%%%%%

[testnumcases testnumdims testnumbatches]=size(testbatchdata);
N=testnumcases;
temp_h2_test = zeros(testnumcases,numpens,testnumbatches);
for batch = 1:testnumbatches
data = [testbatchdata(:,:,batch)];
[temp_h1, temp_h2] = ...
mf_class(data,vishid,hidbiases,visbiases,hidpen,penbiases);
temp_h2_test(:,:,batch) = temp_h2;
end

[numcases numdims numbatches]=size(batchdata);
N=numcases;
temp_h2_train = zeros(numcases,numpens,numbatches);
for batch = 1:numbatches
data = [batchdata(:,:,batch)];
[temp_h1, temp_h2] = ...
mf_class(data,vishid,hidbiases,visbiases,hidpen,penbiases);
temp_h2_train(:,:,batch) = temp_h2;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

w1_penhid = hidpen';
w1_vishid = vishid;
w2 = hidpen;
h1_biases = hidbiases; h2_biases = penbiases;

w_class = 0.1*randn(numpens,10);
topbiases = 0.1*randn(1,10);

for epoch = 1:maxepoch

%%%% TEST STATS
%%%% Error rates
[testnumcases testnumdims testnumbatches]=size(testbatchdata);
N=testnumcases;
bias_hid= repmat(h1_biases,N,1);
bias_pen = repmat(h2_biases,N,1);
bias_top = repmat(topbiases,N,1);

err=0;
err_cr=0;
counter=0;
for batch = 1:testnumbatches
data = [testbatchdata(:,:,batch)];
temp_h2 = temp_h2_test(:,:,batch);
target = [testbatchtargets(:,:,batch)];

w1probs = 1./(1 + exp(-data*w1_vishid -temp_h2*w1_penhid - bias_hid ));
w2probs = 1./(1 + exp(-w1probs*w2 - bias_pen));
targetout = exp(w2probs*w_class + bias_top );
targetout = targetout./repmat(sum(targetout,2),1,10);
[I J]=max(targetout,[],2);
[I1 J1]=max(target,[],2);
counter=counter+length(find(J~=J1));
err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
end

test_err(epoch)=counter;
test_crerr(epoch)=err_cr;
fprintf(1,'\nepoch %d test misclassification err %d (out of 10000), test cross entropy error %f \n',epoch,test_err(epoch),test_crerr(epoch));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%% TRAINING STATS
%%%% Error rates
[numcases numdims numbatches]=size(batchdata);
N=numcases;
err=0;
err_cr=0;
counter=0;
for batch = 1:numbatches
data = [batchdata(:,:,batch)];
temp_h2 = temp_h2_train(:,:,batch);
target = [batchtargets(:,:,batch)];

w1probs = 1./(1 + exp(-data*w1_vishid -temp_h2*w1_penhid - bias_hid ));
w2probs = 1./(1 + exp(-w1probs*w2 - bias_pen));
targetout = exp(w2probs*w_class + bias_top );
targetout = targetout./repmat(sum(targetout,2),1,10);
[I J]=max(targetout,[],2);
[I1 J1]=max(target,[],2);
counter=counter+length(find(J~=J1));

err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
end

train_err(epoch)=counter;
train_crerr(epoch)=err_cr;
fprintf(1,'epoch %d train misclassification err %d train (out of 60000), train cross entropy error %f \n',epoch, train_err(epoch),train_crerr(epoch));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

save backprop_weights w1_vishid w1_penhid w2 w_class h1_biases h2_biases topbiases test_err test_crerr train_err train_crerr

%%% Do Conjugate Gradient Optimization

rr = randperm(600);
for batch = 1:numbatches/100
fprintf(1,'epoch %d batch %d\r',epoch,batch);
data = zeros(10000,numdims);
temp_h2 = zeros(10000,numpens);
targets = zeros(10000,10);
tt1=(batch-1)*100+1:batch*100;
for tt=1:100
data( (tt-1)*100+1:tt*100,:) = batchdata(:,:,rr(tt1(tt)));
temp_h2( (tt-1)*100+1:tt*100,:) = temp_h2_train(:,:,rr(tt1(tt)));
targets( (tt-1)*100+1:tt*100,:) = batchtargets(:,:,rr(tt1(tt)));
end

%%%%%%%% DO CG with 3 linesearches

VV = [w1_vishid(:)' w1_penhid(:)' w2(:)' w_class(:)' h1_biases(:)' h2_biases(:)' topbiases(:)']';
Dim = [numdims; numhids; numpens; ];

% checkgrad('CG_MNIST_INIT',VV,10^-5,Dim,data,targets);
max_iter=3;
if epoch<6
[X, fX, num_iter,ecg_XX] = minimize(VV,'CG_MNIST_INIT',max_iter,Dim,data,targets,temp_h2);
else
[X, fX, num_iter,ecg_XX] = minimize(VV,'CG_MNIST',max_iter,Dim,data,targets,temp_h2);
end
w1_vishid = reshape(X(1:numdims*numhids),numdims,numhids);
xxx = numdims*numhids;
w1_penhid = reshape(X(xxx+1:xxx+numpens*numhids),numpens,numhids);
xxx = xxx+numpens*numhids;
w2 = reshape(X(xxx+1:xxx+numhids*numpens),numhids,numpens);
xxx = xxx+numhids*numpens;
w_class = reshape(X(xxx+1:xxx+numpens*10),numpens,10);
xxx = xxx+numpens*10;
h1_biases = reshape(X(xxx+1:xxx+numhids),1,numhids);
xxx = xxx+numhids;
h2_biases = reshape(X(xxx+1:xxx+numpens),1,numpens);
xxx = xxx+numpens;
topbiases = reshape(X(xxx+1:xxx+10),1,10);
xxx = xxx+10;

end

end


53 changes: 53 additions & 0 deletions dbm/code_DBM_true_unsup/code_DBM/CG_MNIST.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
function [f, df] = ECG1(VV,Dim,XX,target,temp_h2);

numdims = Dim(1);
numhids = Dim(2);
numpens = Dim(3);
N = size(XX,1);

X=VV;
% Do decomversion.
w1_vishid = reshape(X(1:numdims*numhids),numdims,numhids);
xxx = numdims*numhids;
w1_penhid = reshape(X(xxx+1:xxx+numpens*numhids),numpens,numhids);
xxx = xxx+numpens*numhids;
hidpen = reshape(X(xxx+1:xxx+numhids*numpens),numhids,numpens);
xxx = xxx+numhids*numpens;
w_class = reshape(X(xxx+1:xxx+numpens*10),numpens,10);
xxx = xxx+numpens*10;
hidbiases = reshape(X(xxx+1:xxx+numhids),1,numhids);
xxx = xxx+numhids;
penbiases = reshape(X(xxx+1:xxx+numpens),1,numpens);
xxx = xxx+numpens;
topbiases = reshape(X(xxx+1:xxx+10),1,10);
xxx = xxx+10;

bias_hid= repmat(hidbiases,N,1);
bias_pen = repmat(penbiases,N,1);
bias_top = repmat(topbiases,N,1);

w1probs = 1./(1 + exp(-XX*w1_vishid -temp_h2*w1_penhid - bias_hid ));
w2probs = 1./(1 + exp(-w1probs*hidpen - bias_pen));
targetout = exp(w2probs*w_class + bias_top );
targetout = targetout./repmat(sum(targetout,2),1,10);

f = -sum(sum( target(:,1:end).*log(targetout)));

IO = (targetout-target(:,1:end));
Ix_class=IO;
dw_class = w2probs'*Ix_class;
dtopbiases = sum(Ix_class);

Ix2 = (Ix_class*w_class').*w2probs.*(1-w2probs);
dw2_hidpen = w1probs'*Ix2;
dw2_biases = sum(Ix2);

Ix1 = (Ix2*hidpen').*w1probs.*(1-w1probs);
dw1_penhid = temp_h2'*Ix1;

dw1_vishid = XX'*Ix1;
dw1_biases = sum(Ix1);

df = [dw1_vishid(:)' dw1_penhid(:)' dw2_hidpen(:)' dw_class(:)' dw1_biases(:)' dw2_biases(:)' dtopbiases(:)']';


58 changes: 58 additions & 0 deletions dbm/code_DBM_true_unsup/code_DBM/CG_MNIST_INIT.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
function [f, df] = ECG1(VV,Dim,XX,target,temp_h2);

numdims = Dim(1);
numhids = Dim(2);
numpens = Dim(3);
N = size(XX,1);

X=VV;
% Do decomversion.
w1_vishid = reshape(X(1:numdims*numhids),numdims,numhids);
xxx = numdims*numhids;
w1_penhid = reshape(X(xxx+1:xxx+numpens*numhids),numpens,numhids);
xxx = xxx+numpens*numhids;
hidpen = reshape(X(xxx+1:xxx+numhids*numpens),numhids,numpens);
xxx = xxx+numhids*numpens;
w_class = reshape(X(xxx+1:xxx+numpens*10),numpens,10);
xxx = xxx+numpens*10;
hidbiases = reshape(X(xxx+1:xxx+numhids),1,numhids);
xxx = xxx+numhids;
penbiases = reshape(X(xxx+1:xxx+numpens),1,numpens);
xxx = xxx+numpens;
topbiases = reshape(X(xxx+1:xxx+10),1,10);
xxx = xxx+10;

bias_hid= repmat(hidbiases,N,1);
bias_pen = repmat(penbiases,N,1);
bias_top = repmat(topbiases,N,1);

w1probs = 1./(1 + exp(-XX*w1_vishid -temp_h2*w1_penhid - bias_hid ));
w2probs = 1./(1 + exp(-w1probs*hidpen - bias_pen));
targetout = exp(w2probs*w_class + bias_top );
targetout = targetout./repmat(sum(targetout,2),1,10);

f = -sum(sum( target(:,1:end).*log(targetout)));

IO = (targetout-target(:,1:end));
Ix_class=IO;
dw_class = w2probs'*Ix_class;
dtopbiases = sum(Ix_class);

Ix2 = (Ix_class*w_class').*w2probs.*(1-w2probs);
dw2_hidpen = w1probs'*Ix2;
dw2_biases = sum(Ix2);

Ix1 = (Ix2*hidpen').*w1probs.*(1-w1probs);
dw1_penhid = temp_h2'*Ix1;
dw1_vishid = XX'*Ix1;
dw1_biases = sum(Ix1);

dhidpen = 0*dw2_hidpen;
dw1_penhid = 0*dw1_penhid;
dw1_vishid = 0*dw1_vishid;
dw2_biases = 0*dw2_biases;
dw1_biases = 0*dw1_biases;

df = [dw1_vishid(:)' dw1_penhid(:)' dw2_hidpen(:)' dw_class(:)' dw1_biases(:)' dw2_biases(:)' dtopbiases(:)']';


Loading

0 comments on commit 6087a83

Please sign in to comment.