Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

9.520 Fall 2015 Project Submission: Implement PFB algorithm for MKL under GURLS(GURLS/MATLAB) #28

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
*.*~
gurls++/doc
bgurls++/doc

*.mat

*.mat

*.mat

gurls/demo/demo_mkl_class.mat
Binary file added gurls/GURLS_MKL.pdf
Binary file not shown.
351 changes: 351 additions & 0 deletions gurls/demo/data/ionosphere.csv

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions gurls/demo/demo_mkl_classification.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
%%% set up work directory, install package
% run('../utils/gurls_install.m'); savepath;
addpath('./func/')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% MKL classification: ionosphere %%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% 1 read in ionosphere data ----
dat = csvread('./data/ionosphere.csv');
y = dat(:, 1);
X = dat(:, 2:size(dat, 2));

idx_va = randsample(size(y, 1), round(size(y, 1)/5));
idx_tr = setdiff(1:size(y, 1), idx_va);

X_tr = X(idx_tr, :);
y_tr = y(idx_tr);

X_va = X(idx_va, :);
y_va = y(idx_va);

% 2 train/test pipeline under GURLS ----
name = 'demo_mkl_class';
opt = gurls_defopt(name);
opt = gurls_defopt_mkl(opt);

% specify:
% (1) hoperf => macroavg
% (2) kernel type/parameter
opt.hoperf = @perf_macroavg;
opt.mkl.type = ...
{{'kernel_rbf', 1:0.2:4}, ...
{'kernel_linear', 0}};

opt.seq = {...
'split:ho', ...
'kernel:mkl', ...
'paramsel:homkl', ...
'rls:dual_mkl', ...
'predkernel:traintest_mkl', ...
'pred:dual_mkl', ...
'perf:macroavg'};

opt.process{1} = [2,2,2,2,0,0,0];
opt.process{2} = [3,3,3,3,2,2,2];
gurls(X_tr, y_tr, opt, 1);
gurls(X_va, y_va, opt, 2);

% result summary/visualization

plot_mkl_path(X_tr, y_tr, opt, 'norm');
plot_mkl_path(X_tr, y_tr, opt, 'perf');

sum(opt.paramsel.norm_path(:,:,1), 1);
opt.perf
84 changes: 84 additions & 0 deletions gurls/demo/demo_mkl_regression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
%%% set up work directory, install package
% run('../utils/gurls_install.m'); savepath;
addpath('./func/')

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% MKL regression on linear data %%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% 1 generate linear data----
p = 50;
n = 500;
n2 = 200;

X_0 = normrnd(0, 1, n + n2, p);
beta = normrnd(0, 1, p, 1);

sigma = 20;
K_0 = exp(-square_distance(X_0', X_0')/(sigma^2));
alpha = normrnd(0, 1, n + n2, 1);

y_0 = X_0 * beta + K_0 * alpha;

X_tr = X_0(1:n, :);
y_tr = y_0(1:n);

X_va = X_0((n+1):(n+n2), :);
y_va = y_0((n+1):(n+n2));

% 2 train/test pipeline under GURLS ----
name = 'demo_mkl_reg';
opt = gurls_defopt(name);
opt = gurls_defopt_mkl(opt);
% specify kernel type/parameter
% (sigma for rbf, none for linear)
opt.mkl.type = ...
{{'kernel_rbf', sqrt((1.2.^(0:49))/2)}};
opt.mkl.strategy = false;

opt.seq = {...
'split:ho', ...
'kernel:mkl', ...
'paramsel:homkl', ...
'rls:dual_mkl', ...
'predkernel:traintest_mkl', ...
'pred:dual_mkl', ...
'perf:rmsestd'};

opt.process{1} = [2,2,2,2,0,0,0];
opt.process{2} = [3,3,3,3,2,2,2];
gurls(X_tr, y_tr, opt, 1);
gurls(X_va, y_va, opt, 2);

% result summary/visualization
plot_mkl_path(X_tr, y_tr, opt, 'norm');
plot_mkl_path(X_tr, y_tr, opt, 'perf');
% kernel
norm_summary = median(opt.paramsel.norm_path, 3);
kernel_importance = sum(norm_summary, 1);
[norm_value, norm_order] = ...
sort(kernel_importance, 'descend');
[norm_order(1:5); norm_value(1:5)];

% 3 train/test using true model ----
name = 'demo_mkl_reg_true';
opt = gurls_defopt(name);
opt = gurls_defopt_mkl(opt);
% specify kernel type/parameter
% (sigma for rbf, none for linear)
opt.mkl.type = ...
{{'kernel_rbf', 10}, {'kernel_linear', 0}};
opt.mkl.par_mkl = {[0], [0]};

% skip paramsel
opt.seq = {...
'split:ho', ...
'kernel:mkl', ...
'rls:dual_mkl', ...
'predkernel:traintest_mkl', ...
'pred:dual_mkl', ...
'perf:rmsestd'};
opt.process{1} = [2,2,2,0,0,0];
opt.process{2} = [3,3,3,2,2,2];
gurls(X_tr, y_tr, opt, 1);
gurls(X_va, y_va, opt, 2);
5 changes: 4 additions & 1 deletion gurls/demo/demo_nystrom_gurls.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

name = 'nysrbfho';
opt = gurls_defopt(name);
opt.seq = {'split:ho', 'paramsel:siglamho_nystrom', 'kernel:rbf_nystrom', 'rls:dual_nystrom', 'predkernel:traintest_nystrom', 'pred:dual_nystrom', 'perf:rmse'};
opt.seq = {'split:ho', 'paramsel:siglamho_nystrom', ...
'kernel:rbf_nystrom', 'rls:dual_nystrom', ...
'predkernel:traintest_nystrom', ...
'pred:dual_nystrom', 'perf:rmse'};
opt.process{1} = [2,2,2,2,0,0,0];
opt.process{2} = [3,3,3,3,2,2,2];

Expand Down
3 changes: 2 additions & 1 deletion gurls/gurls_train.m
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ function selectDatatype(X, opt)
function y = analyzeData(X, y, opt)

selectDatatype(X, opt);

opt.newprop('n', -1);
opt.newprop('d', -1);
switch opt.datatype
Expand Down Expand Up @@ -231,6 +231,7 @@ function selectDatatype(X, opt)
opt.setting.d = opt.d;
opt.setting.problem = opt.problem;
opt.setting.numoutputs = opt.numoutputs;

if isprop(opt, 'labeldict')
opt.setting.labeldict = opt.labeldict;
end
Expand Down
83 changes: 83 additions & 0 deletions gurls/kernels/kernel_mkl.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
function [kernel] = kernel_mkl(X, y, opt)

% kernel_rbf(opt)
% Computes M n x n kernel matrices given input
% INPUTS:
% -OPT: struct with the following options:
% - mkl.type: cell array with element {'type', params}
% support: type = rbf: params = 1 x M_rbf array of sigmas
% type = linear: params = 0
% -X: input data matrix
%
% OUTPUT: struct with the following fields:
% -type: 'mkl'
% -K_mkl: n x n x M array of M n x n kernel matrices
% -eig_mkl: M x 1 array of max eigenvalues of M kernel matrices

% initialize output container
if ~isprop(opt,'kernel')
opt.newprop('kernel', struct());
end
mkl_type = opt.mkl.type;
kernel = opt.kernel;

% compute kernel specified by mkl_type
if any(cellfun('length', mkl_type)~=2)
error('parameter not supplied for some kernels')
end

n = size(X, 1);
M = sum(cellfun(@(x) length(x{2}), mkl_type));
K_list = zeros(n, n, M);
eig_list = zeros(M, 1);
K_id = 0;

for idx = 1:length(mkl_type)
spec = mkl_type{idx};
kern_fun = str2func(spec{1});
par_list = spec{2};
% evaluate kernels: rbf case
if strcmp(spec{1}, 'kernel_rbf')
for sigma_idx = 1:length(par_list)
K_id = K_id + 1;
opt = set_sigma(opt, par_list(sigma_idx));
K_temp = kern_fun(X, y, opt);
K_list(:, :, K_id) = K_temp.K;
eig_list(K_id) = eigs(K_temp.K, 1);
end
elseif strcmp(spec{1}, 'kernel_linear')
K_id = K_id + 1;
K_temp = kern_fun(X, y, opt);
K_list(:, :, K_id) = K_temp.K;
eig_list(K_id) = eigs(K_temp.K, 1);
else
fprintf('''%s'' currently not supported, skip\n', spec{1})
end
end





kernel.type = 'mkl';
kernel.K_mkl = K_list;
kernel.eig_mkl = eig_list;

if isprop(opt, 'paramsel')
if isfield(opt.paramsel, 'manual_sigma')
% clear opt.paramsel if it is defined by @set_sigma
opt.paramsel = struct();
end
end
end

function [opt] = set_sigma(opt, sigma)
% insert sigma to the slot 'opt.paramsel.sigma'
if ~isprop(opt,'paramsel')
opt.newprop('paramsel', struct());
% signature
opt.paramsel.manual_sigma = true;
end
opt.paramsel.sigma = sigma;
end

Loading