-
Notifications
You must be signed in to change notification settings - Fork 0
/
ACR_Modeling.m
452 lines (421 loc) · 16.6 KB
/
ACR_Modeling.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
%% Maximum entropy and quantized metric models for absolute category ratings
% Code for our paper "Maximum entropy and quantized metric models for absolute
% category ratings."
%%
% * *Authors:* Saupe, D., Rusek, K., Hägele, D., Weiskopf, D., & Janowski, L.
% * *Journal*: <http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=97?source=authoralert
% IEEE Signal Processing Letters>
% * *Publication Date*: 2024
% * *Volume*: 31
% * *On Page(s)*: 2970-2974
% * *Print ISSN*: 1070-9908
% * *Online ISSN*: 1558-2361
% * *Digital Object Identifier*: <https://ieeexplore.ieee.org/document/10716765?source=authoralert
% 10.1109/LSP.2024.3480832>
% * *Communicating author:* Dietmar Saupe, [email protected]
%%
% Copyright (C) 2024 by Saupe, D., Rusek, K., Hägele, D., Weiskopf, D., & Janowski,
% L. <[email protected]>
%
% Permission to use, copy, modify, and/or distribute this software for any purpose
% with or without fee is hereby granted.
%
% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
% REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
% AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
% INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
% LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
% OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
% OF THIS SOFTWARE.
%
% The program reads either one of the two datasets KonIQ-10k or VQEG-HDTV and
% then computes the maximum entropy and quantized metric models for all the
% ACR distributions in the datasets. This reproduces (among other things) the
% values in the columns for AIC and G-test in Tables I and II.
clear; close all; clc;
rng (0);
%% Select the model types for ACR probability distributions
% list of available ACR probability distribution types
dlist = ["normal", "logistic", "beta", "kumaraswamy", "logit-logistic", "maxentropy", "GSD"];
setGSD ("gsd_prob_vectors.csv"); % reads table for the GSD distribution
% select any subset of the model types for execution
dlist = ["normal", "logistic", "beta"];
%% Select and import CSV file of an ACR dataset
csvlist = ["KonIQ-10k.csv", "VQEG-HDTV.csv"]; % list of ACR datasets
% column 1 = id of stimulus, cols 2:6 = the counts of the 5-level ACR ratings 1 to 5
csvfile = csvlist(1); % choose one of the datasets
M = readtable(csvfile);
head(M)
sample_size = sum(M{:,2:6},2); % total number of ratings for each stimulus
A = M{:,2:6} ./ sample_size; % create probability vectors by normalizing
n = size(A,1); % number of rows (stimuli) in dataset
fprintf ("Imported ACR dataset is %s with %d rows.\n", csvfile, n);
%% Loop over all model types and distributions
nmodels = length(dlist); % number of models of distributions
rows = n * nmodels; % Number of rows in output structure
cols = 8; % Columns: [image name, ACR ratings, model name, par, pmodel, nll, gtest, pvalue]
C = cell(rows,cols); % Create a cell array for the output file
nll = zeros(nmodels,n);
gtest = zeros(nmodels,n);
pvalue = zeros(nmodels,n);
for model = 1:nmodels
% Compute the cumulative distributions of G-test statistic for the data and the data bootstrapped from the models
distribution = dlist(model);
fprintf ("Starting with distribution model: %s\n", distribution);
%
tic
for item = 1:n
if (mod(item,100)==0), fprintf ("%5d items\n", item); end
p = A(item,1:5) + eps; % select prob vectors from the dataset one by one
switch distribution
case 'maxentropy'
[par, pmodel, nll(model,item)] = HmaxPV (p);
otherwise
[par, pmodel, nll(model,item)] = solve_opt (p, distribution);
end
nll(model,item) = sample_size(item) * nll(model,item); % multiply by sample size to get the true NLL
gtest(model,item) = - 2 * sample_size(item) * (sum(p.*log(pmodel)) - sum(p.*log(p))); % G-test likelihood ratio
pvalue(model,item) = 1 - chi2cdf(gtest(model,item),2); % p-value from chi2 with df=2
% output in cell array
row = (model-1) * n + item; % row of output cell array
C(row,:) = {M{item,1}, M{item,2:6}, dlist(model), par, pmodel, nll(model,item), gtest(model,item), pvalue(model,item)};
end
T = toc;
% Collect statistics: Akaike and Bayesian Information Criterion, means of gtest, nll and likelihood
AIC(model) = (2*n)*2 + 2*sum(nll(model,:)); % AIC = k*2 + 2*NLL, where k = 2*n = number of parameters
BIC(model) = (2*n)*log(n) + 2*sum(nll(model,:)); % BIC = k*log(n) - 2*log(likelihood)
mean_gtest(model) = sum(gtest(model,:))/n;
mean_nll(model) = sum(nll(model,:))/n;
mean_likelihood(model) = sum( exp(-nll(model,:)./sample_size(:)') )/n; % mean of the exponential of the expected log-likelihood
fprintf("AIC %7.1f BIC %7.1f Mean G-test %7.5f Mean NLL %7.5f Mean likelihood %7.5f\n", ...
AIC(model), BIC(model), mean_gtest(model), mean_nll(model), mean_likelihood(model));
fprintf ("Finished in %.2f secs with distribution model <%s> for %d stimuli in %s.\n", T, distribution, n, csvfile);
fprintf("---------------------------------------------------------------------------------------");
end
%% Generate Excel output file
filename = sprintf("out %s %s.xlsx", csvfile, datetime);
TC = cell2table(C,...
"VariableNames",["stimulus" "ratings" "model" "pars" "probs" "nll" "gtest" "pval"]);
writetable(TC,filename);
%% Figure for G-test cdfs
Model_plot = true;
if Model_plot
fprintf("Critical G-test value at p = 0.05 is %5.2f\n", chi2inv(0.95,2));
x = zeros(nmodels,n);
y = zeros(nmodels,n);
figure
for model = 1:nmodels
[x(model,:), y(model,:)] = samplecdf (gtest(model,:));
p = plot (x(model,:), y(model,:)); hold on
p.LineWidth = 1.5;
end
% also plot the chi2 distribution with 2 degrees of freedom
p = plot (x(1,:), chi2cdf(x(1,:),2), '--'); hold on
string = sprintf("%s: G-test CDFs for distribution models", csvfile);
title(string)
p.LineWidth = 1.5;
xlabel('G-Test'); xlim([0 10]);
ylabel('CDF'); ylim([0 1.0]);
legend([dlist,"chi square"],'Location','southeast');
hold off
end
%% Solver by optimization
function [par, pvout, nll] = solve_opt (pvin, distribution)
%-----------------------------------------------------------------------
% Interior point solver fmincon from Matlab
% settings for fmincon optimization
DisplayOption = 'none'; % fminunc options: none, iter, notify, final, iter-detailed
nstimulioptions = optimoptions('fmincon');
options = optimoptions(@fmincon, ...
'MaxIter',1000, ...
'MaxFunEvals', 10000, ...
'Algorithm','interior-point',... %interior-point, sqp
'SpecifyObjectiveGradient',false, ...
'CheckGradients', false, ...
'TolFun',1e-6, ...
'TolX',1e-6, ...
'Display', DisplayOption);
% list of constraints
A = [];
b = [];
Aeq = [];
beq = [];
nonlcon = [];
%
switch distribution
case 'normal'
delta = 20;
lb = [1-delta,0];
ub = [5+delta,100];
case 'logistic'
delta = 20;
lb = [1-delta,0];
ub = [5+delta,100];
case 'beta'
lb = [0,0];
ub = [20,20];
case 'kumaraswamy'
lb = [0,0];
ub = [20,20];
case 'logit-logistic'
lb = [0,0];
ub = [1,20];
case 'GSD'
lb = [1,0];
ub = [5,1];
end
% function to be minimized
fun = @(x)opt (x, pvin, distribution);
%
% do the optimization
% sometimes the random initial guess leads to convergence to a suboptimal
% local MLE maximum. This is detected by the check if the normalized gtest
% (=gtest/ratings) is greater than 1.
% In that condition, a new random initial guess is tried (up to 10 times).
ngtest = inf; iter = 1;
while ngtest > 1.0 & iter <= 10
x0 = init_solution (distribution, pvin);
[par,nll,exitflag,output] = fmincon(fun,x0,A,b,Aeq,beq,lb,ub,nonlcon,options);
pvout = ACR(par,distribution) + eps;
if exitflag <= 0
fprintf("Exit fmincon in solve_opt (psi, rho). flag = %d\n", exitflag);
output
fprintf (" pvin %f %f %f %f %f entropy %f\n", pvin, - sum(pvin.*log(pvin)));
fprintf ("pvout %f %f %f %f %f x-entropy %f\n", pvout, - sum(pvin.*log(pvout)));
end
ngtest = - 2 * (sum(pvin.*log(pvout)) - sum(pvin.*log(pvin))); % normalized G-test
if (iter==1 & ngtest>1)
fprintf("Suboptimal local minimum. ngtest %.2f nll %.2f. Next: ", ngtest, nll);
end
if (iter>1 & ngtest>1),
fprintf("ngtest %.2f nll %.2f. Next: ", ngtest, nll);
elseif (iter>1 & ngtest<=1)
fprintf("ngtest %.2f nll %.2f. OK: Accepted (%d trials).\n", ngtest, nll, iter);
end
iter = iter + 1;
end
pvout = ACR(par,distribution) + eps;
end
%% Setup random initial conditions
function x0 = init_solution (distribution, pvin)
switch distribution
case 'normal'
x0(1) = 1 + 4*rand;
x0(2) = 3*rand;
case 'logistic'
x0(1) = 1 + 4*rand;
x0(2) = 3*rand;
case 'beta'
x0(1) = rand;
x0(2) = rand;
case 'kumaraswamy'
x0(1) = rand;
x0(2) = rand;
case 'logit-logistic'
x0(1) = rand;
x0(2) = 3*rand;
case 'GSD'
[x0(1), ~, x0(2)] = SOS(pvin);
end
end
%% Loss function
function nll = opt (x, pv, distribution)
% x = 2d parameter vector
% pv = prob vector
model = ACR(x,distribution)+eps; % add eps to avoid log(0);
nll = -sum(pv.*log(model));
end
%% ACR probability distribution
function p = ACR(x,distribution)
% x = 2d parameter vector
switch distribution
case 'normal'
thresholds = [1.5,2.5,3.5,4.5];
pcdf = normcdf(thresholds,x(1),x(2));
case 'logistic'
thresholds = [1.5,2.5,3.5,4.5];
pcdf = logisticcdf(thresholds,x(1),x(2));
case 'beta'
thresholds = [0.2,0.4,0.6,0.8];
pcdf = betacdf(thresholds,x(1),x(2));
case 'kumaraswamy'
thresholds = [0.2,0.4,0.6,0.8];
pcdf = kumaraswamycdf(thresholds,x(1),x(2));
case 'logit-logistic'
thresholds = [0.2,0.4,0.6,0.8];
pcdf = logitlogisticcdf(thresholds,x(1),x(2));
case 'GSD'
p = getGSD(x(1),x(2));
return;
otherwise
warning('Unexpected distribution type in ACR(x,distributon).')
end
p(1) = pcdf(1) - 0;
p(2) = pcdf(2) - pcdf(1);
p(3) = pcdf(3) - pcdf(2);
p(4) = pcdf(4) - pcdf(3);
p(5) = 1 - pcdf(4);
end
%% $(\psi,var,\rho)$ for a probability vector (from the SOS hypothesis)
function [psi, var, rho] = SOS(p)
% Input p = probability row vector of dimension 5
psi = p*[1;2;3;4;5]; % mean
var = p*(([1;2;3;4;5]-psi).^2); % variance
vmin = (ceil(psi)-psi)*(psi-floor(psi)); % minimal variance
vmax = (psi-1)*(5-psi); % maximal variance
if (vmax > vmin)
rho = (vmax - var)/(vmax - vmin); % rho = 1 -> minimal variance
else
rho = 0.5;
end
end
%% Compute cumulative distribution function from random samples
function [x, cdf] = samplecdf (data)
% Compute cumulative distribution function for a set of numbers
% Input: data = row vector of real numbers
% Output: cdf as cdf(x) = proportion of data <= x
x = sort(data);
cdf = (1:length(data))/length(data);
end
%% Kumaraswamy distribution
% Support on [0,1] with cdf $1 -(1-x^a)^b$
function cdf = kumaraswamycdf(x,a,b)
cdf = 1 - (1-x.^a).^b;
end
%% Logistic distribution
function cdf = logisticcdf(x,mu,s)
cdf = 1./(1 + exp(-(x-mu)./s));
end
%% Logit-Logistic distribution
function cdf = logitlogisticcdf(x,mu,s)
cdf = 1./(1 + ((mu*(1-x))./(x*(1-mu))).^(1/s));
end
%% HmaxPV - Maximum entropy probability vector
function [par, pvout, nll] = HmaxPV (pvin)
%-----------------------------------------
% This function outputs psi (mean) and rho (complementary normalized variance)
% of the maximum entropy probabilty distribution pvout which minimizes
% the negative log-likelihood for the input distribution pvin.
% It also computes the corresponding maximum entropy probability
% distribution pvout.
% By means of a theorem (not contained in the paper), the mean and complementary normalized
% variance of the solution are equal to those of the input distribution (oracle).
% So pvout has the same mean and variance as pvin.
% The main part of this function therefore computes the maximum entropy
% probabilty distribution for the given input mean and complementary normalized
% variance.
[psi, ~, rho] = SOS(pvin);
par = [psi, rho]; % maximum entropy model parameters are psi and rho (Oracle)
% Interior point solver fmincon from Matlab
% settings for fmincon optimization
DisplayOption = 'none'; % fminunc options: none, iter, notify, final, iter-detailed
nstimulioptions = optimoptions('fmincon');
options = optimoptions(@fmincon, ...
'MaxIter',500, ...
'MaxFunEvals', 10000, ...
'Algorithm','interior-point',... %interior-point, sqp
'SpecifyObjectiveGradient',true, ...
'CheckGradients', false, ...
'TolFun',1e-8, ...
'TolX',1e-8, ...
'ConstraintTolerance', 1e-5, ...
'Display', DisplayOption);
% lower and upper bounds
lb = [0,0,0,0,0];
ub = [1,1,1,1,1];
% list of linear constraints
A = [];
b = [];
Aeq = [1,1,1,1,1];
beq = [1];
% function for nonlinear equality constraints
mycon = @(x)constraints(x, psi, rho);
% do the optimization
fun = @(x)optH(x); % negative entropy shall be minimized
p0 = diff([0, sort(rand(1,4)), 1]); % initialization: random ACR probability vector
% Another good initialization is p0 = [0.2, 0.2, 0.2, 0.2, 0.2];
% Initializations near the boundary of the domain may fail to give convergence.
[pvout,fval,exitflag,output] = fmincon(fun,p0,A,b,Aeq,beq,lb,ub,mycon,options);
if exitflag <= 0
fprintf("Exit fmincon in HmaxPV (psi, rho). flag = %d\n", exitflag);
output
fprintf (" pvin %f %f %f %f %f entropy %f\n", pvin, - sum(pvin.*log(pvin)));
fprintf ("pvout %f %f %f %f %f x-entropy %f\n", pvout, - sum(pvin.*log(pvout)));
end
pvout = pvout + eps; % add machine unit to prevent log(0)
nll = - sum(pvin.*log(pvout));
end
function analyse (p, text) % only for debugging
[psi, ~, rho] = SOS(p);
[neg_entropy, grad] = optH (p);
fprintf ("%s psi/rho %7.5f %7.5f p %f %f %f %f %f entropy %f\n", text, psi, rho, p, -neg_entropy);
end
function [c, ceq] = constraints (p, psi_, rho_)
% c is an array of nonlinear inequality contraints
c = [];
% ceq is an array of two nonlinear equality contraints
% ceq(1) and ceq(2) ensure that p has the required psi and rho
[psi, ~, rho] = SOS(p);
ceq(1) = psi_ - psi;
ceq(2) = rho_ - rho;
end
function [neg_entropy, grad] = optH (p)
% negative entropy to be minimized to obtain maximum entropy
p0 = p(p>0);
neg_entropy = sum(p0.*log(p0)); % negative entropy
if nargout > 1 % gradient required (column vector!)
grad = (log(p+eps)+1)';
end
end
%% GSD - Table lookup for the GSD
% The GSD model is comuted from a large table of precomputed models
function setGSD (csvfile)
%-----------------------------------------------------------------------
global GSDarray;
% Interpretation of GSDarray(i,j,k)
% GSDarray(1,j,k) = psi = 1 + (k-1)*4/1000
% GSDarray(2,j,k) = rho = (j-1)*100
% GSDarray(3:7,j,k) = pv(1:5) for parameters (psi,rho)
T = readtable(csvfile); % read csv file with GSD table info
T(1,:) = []; % remove table header row
GSDarray = table2array(T)';
GSDarray = reshape(GSDarray,7,101,[]); % GSDarray has 2+5 columnlength, 101 rowlength, and 1001 pages
end
function pv = getGSD (psi, rho)
%-----------------------------------------------------------------------
global GSDarray;
if ~isreal(psi) || ~isreal(rho)
psi,rho
end
if (psi<1 || psi>5 || rho<0 || rho>1)
fprintf ("Error in getGSD %f %f \n", psi, rho);
psi = min([max([psi,1]),5]);
rho = min([max([rho,0]),1]);
end
kfloat = 1 + 250*(psi-1);
if (kfloat < 1)
fprintf("getGSD: kfloat < 1. psi %f\n", psi);
kfloat = 1;
end
k = (floor(kfloat)); dk = kfloat - k;
if (k==1001)
k = 1000; dk = 1.0;
end
jfloat = 1 + 100*rho;
if (jfloat < 1)
fprintf("getGSD: jfloat < 1. rho %f\n", psi);
jfloat = 1;
end
j = (floor(jfloat)); dj = jfloat - j;
if (j==101)
j = 100; dj = 1.0;
end
if (j<1)
fprintf("getGSD: j < 1. %d\n", j);
j = 1;
end
%pv = GSDarray(3:7,j,k); % use this to turn off bilinear interpolation
pv = (1-dj)*(1-dk)*GSDarray(3:7,j,k) + dj*(1-dk)*GSDarray(3:7,j+1,k) + ...
(1-dj)*dk*GSDarray(3:7,j,k+1) + dj*dk*GSDarray(3:7,j+1,k+1);
pv = pv'+eps; % returns a row vector with bilinear interpolation
end