-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathRWSTuneParameters.m
148 lines (136 loc) · 5.73 KB
/
RWSTuneParameters.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
% This script generates low-rank approximation of latent kernel matrix using
% random features approach based on dtw like distance for single-variate
% UCR time-series datasets. User Liblinear to perform grid search with
% K-fold cross-validation!
%
% Author: Lingfei Wu
% Date: 01/20/2019
%clear,clc
%parpool('local');
%addpath(genpath('utilities'));
function info = RWSTuneParameters(DS,NumOfSamples)
%file_dir = './datasets/';
% List all datasets
%filename_list = {'Gun_Point'};
DMin = 1;
DMax_list = [5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100];
sigma_list = [1e-3 3e-3 1e-2 3e-2 0.10 0.14 0.19 0.28 0.39 0.56 ...
0.79 1.12 1.58 2.23 3.16 4.46 6.30 8.91 10 31.62 1e2 3e2 1e3];
R = NumOfSamples; % number of random time-series: try 32 or 128
CV = 10; % number of folders of cross validation
for jjj = 1:1
%filename = filename_list{jjj};
%disp(filename);
info.aveAccu_best = 0;
info.valAccuHist = [];
info.DMaxHist = [];
info.sigmaHist = [];
info.lambda_invHist = [];
for jj = 1:length(DMax_list)
for j = 1:length(sigma_list)
DMax = DMax_list(jj)
sigma = sigma_list(j)
% load, shuffle, and prepare the training data
timer_start = tic;
%Train = load(strcat(file_dir,filename,'/',filename,'_TRAIN'));
Train = DS.Train;
Train_numSamples = size(Train,1);
trainData = zeros(Train_numSamples, R+1);
%Train = Train(randperm(Train_numSamples),:); % shuffle the data
trainX = Train;
% generate random time series with variable length, where each
% value in random series is sampled from Gaussian distribution
% parameterized by sigma.
rng('default')
sampleX = cell(R,1);
for i=1:R
D = randi([DMin, DMax],1);
sampleX{i} = randn(1, D)./sigma; % gaussian
end
trainFeaX_random = dtw_similarity_cell(trainX, sampleX);
trainFeaX_random = trainFeaX_random/sqrt(R);
trainData(:,2:end) = trainFeaX_random;
% convert user labels to uniform format binary(-1,1) and multiclasses (1,2,..,k)
%
trainy = DS.TrainClassLabels;
labels = unique(trainy);
numClasses = length(labels);
if numClasses > 2
for i=numClasses:-1:1
ind = (trainy == labels(i));
trainy(ind) = i;
end
else
ind = (trainy == labels(1));
trainy(ind) = -1;
ind = (trainy == labels(2));
trainy(ind) = 1;
end
trainData(:,1) = trainy;
telapsed_fea_gen = toc(timer_start)
disp('------------------------------------------------------');
disp('LIBLinear performs basic grid search by varying lambda');
disp('------------------------------------------------------');
% Linear Kernel
lambda_inverse = [1e-5 1e-4 1e-3 1e-2 1e-1 1 1e1 1e2 1e3 1e4 1e5];
%this is not a parameter relevant to the representations but
%relevant to the classifier. We tune this parameter after the
%extraction of representations as we do for GRAIL for fairness
%lambda_inverse = 1;
for i=1:length(lambda_inverse)
valAccu = zeros(1, CV);
for cv = 1:CV
subgroup_start = (cv-1) * floor(Train_numSamples/CV);
mod_remain = mod(Train_numSamples, CV);
div_remain = floor(Train_numSamples/CV);
if mod_remain >= cv
subgroup_start = subgroup_start + cv;
subgroup_end = subgroup_start + div_remain;
else
subgroup_start = subgroup_start + mod_remain + 1;
subgroup_end = subgroup_start + div_remain -1;
end
test_indRange = subgroup_start:subgroup_end;
train_indRange = setdiff(1:Train_numSamples, test_indRange);
trainFeaX = trainData(train_indRange,2:end);
trainy = trainData(train_indRange,1);
testFeaX = trainData(test_indRange,2:end);
testy = trainData(test_indRange,1);
s2 = num2str(lambda_inverse(i));
s1 = '-s 2 -e 0.0001 -q -c '; % for regular liblinear
% s1 = '-s 2 -e 0.0001 -n 8 -q -c '; % for omp version
s = [s1 s2];
timer_start = tic;
model_linear = train(trainy, sparse(trainFeaX), s);
[test_predict_label, test_accuracy, test_dec_values] = predict(testy, sparse(testFeaX), model_linear);
telapsed_liblinear = toc(timer_start);
valAccu(cv) = test_accuracy(1);
end
ave_valAccu = mean(valAccu);
std_valAccu = std(valAccu);
if(info.aveAccu_best+0.1 < ave_valAccu)
info.DMaxHist = [info.DMaxHist;DMax];
info.sigmaHist = [info.sigmaHist;sigma];
info.lambda_invHist = [info.lambda_invHist;lambda_inverse(i)];
info.valAccuHist = [info.valAccuHist;valAccu];
info.valAccu = valAccu;
info.aveAccu_best = ave_valAccu;
info.stdAccu = std_valAccu;
info.telapsed_fea_gen = telapsed_fea_gen;
info.telapsed_liblinear = telapsed_liblinear;
info.runtime = telapsed_fea_gen + telapsed_liblinear;
info.sigma = sigma;
info.R = R;
info.DMin = DMin;
info.DMax = DMax;
info.lambda_inverse = lambda_inverse(i);
end
end
end
end
%disp(info);
%savefilename = [filename '_rws_R' num2str(R) '_' num2str(CV) 'fold_CV'];
%save(savefilename,'info')
end
%delete(gcp);
end