diff --git a/toolbox/multivariate/barnardtest.m b/toolbox/multivariate/barnardtest.m index 90197cc51..18cacc1c6 100644 --- a/toolbox/multivariate/barnardtest.m +++ b/toolbox/multivariate/barnardtest.m @@ -14,7 +14,7 @@ % % N : Contingency table (default) or n-by-2 input dataset. % Matrix or Table. -% Matrix or table which contains the input contingency +% Matrix or table that contains the input contingency % table (say of size I-by-J) or the original data matrix. % In this last case N=crosstab(N(:,1),N(:,2)). As default % procedure assumes that the input is a contingency table. @@ -23,17 +23,17 @@ % % resolution: The resolution of the search space for the nuisance % parameter. -% Scalar. Small number which defines the resolution. See the -% More About section for more details. +% Scalar. Small number that defines the resolution. See the +% "More About" section for more details. % The default value of the resolution is 0.001. % Example - 'resolution',0.01 % Data Types - single | double % -% datamatrix : Data matrix or contingency table. Boolean. -% If datamatrix is true the first input argument N is forced -% to be interpreted as a data matrix, else if the input -% argument is false N is treated as a contingency table. The -% default value of datamatrix is false, that is the procedure +% datamatrix: Data matrix or contingency table. Boolean. +% If datamatrix is true, the first input argument N is forced +% to be interpreted as a data matrix; otherwise if the input +% argument is false, N is treated as a contingency table. The +% default value of datamatrix is false; that is, the procedure % automatically considers N as a contingency table. % Example - 'datamatrix',true % Data Types - logical @@ -42,7 +42,7 @@ % Output: % % pval: p-value of the test. Scalar. -% pval is the p-value, i.e. the probability of +% pval is the p-value, i.e., represents the probability of % observing the given result, or one more extreme, by % chance if the null hypothesis of independence between % rows and columns is true. Small values of pval cast doubt @@ -70,7 +70,7 @@ % % Barnard's test considers all tables with category sizes $c_1$ and $c_2$ for a % given $p$. The p-value is the sum of probabilities of the tables having a -% score in the rejection region, e.g. having significantly large difference +% score in the rejection region, e.g., having a significantly large difference % in proportions for a two-sided test. The p-value of the test is the % maximum p-value calculated over all $p$ between 0 and 1. The input % resolution parameter controls the resolution to search for. @@ -123,7 +123,7 @@ %{ %% Barnard test with all the default options. - % N= 2x2 Input contingency table + % N= 2x2 Input contingency table. N=[8,14; 1,3]; pval=barnardtest(N); disp(['The p-value of the test is: ' num2str(pval)]) @@ -131,11 +131,11 @@ %{ %% Resolution option. - % N= 2x2 Input contingency table + % N= 2x2 Input contingency table. N=[20,14; 10,13]; - % pvalue with the default resolution (0.001) + % p-value with the default resolution (0.001). pval001=barnardtest(N); - % p value with a resolution of 0.01 + % p-value with a resolution of 0.01. pval01=barnardtest(N,'resolution',0.01); disp(['The p-value with a resolution 0.01 is: ' num2str(pval01)]) disp(['The p-value a resolution 0.001 is: ' num2str(pval001)]) @@ -158,9 +158,9 @@ %{ % An example when the input is a datamatrix. N=[40,14;10,30]; - % Recreate the orginal data matrix + % Recreate the original data matrix. X=crosstab2datamatrix(N); - % barnardtest when input is a datamatrix + % barnardtest when input is a datamatrix. pval=barnardtest(X,'datamatrix',true); %} @@ -171,20 +171,20 @@ pval=barnardtest(N); % our p-value is 0.456054 % This value coincides with the R implementation (package barnard) - % based on a C routine. On the other hand the vectorized implementation - % of Barnard test http://www.mathworks.com/matlabcentral/fileexchange/25760 - % called using mybarnard(N,1000) gives a p-value of 0.456051 + % based on a C routine. On the other hand, the vectorized implementation + % of the Barnard test http://www.mathworks.com/matlabcentral/fileexchange/25760 + % called using mybarnard(N,1000) gives a p-value of 0.456051. %} %% Beginning of code % Check MATLAB version. If it is not smaller than 2013b than output is -% shown in table format +% shown in table format. verMatlab=verLessThan('matlab','8.2.0'); % Check whether N is a contingency table or a n-by-p input dataset (in this -% last case the contingency table is built using the first two columns of the +% last case, the contingency table is built using the first two columns of the % input dataset). if ~isempty(varargin) [varargin{:}] = convertStringsToChars(varargin{:}); @@ -200,12 +200,12 @@ end % If input is a datamatrix it is necessary to construct the contingency -% table +% table. if datamatrix == true N =crosstab(N(:,1),N(:,2)); end -% 1. The deafault resolution of the search space. +% 1. The default resolution of the search space. resolution=0.001; [varargin{:}] = convertStringsToChars(varargin{:}); @@ -216,15 +216,15 @@ UserOptions=varargin(1:2:length(varargin)); if ~isempty(UserOptions) - % Check if number of supplied options is valid + % Check if number of supplied options is valid. if length(varargin) ~= 2*length(UserOptions) error('FSDA:barnardtest:WrongInputOpt','Number of supplied options is invalid. Probably values for some parameters are missing.'); end - % Check if user options are valid options + % Check if user options are valid options. aux.chkoptions(options,UserOptions) end - % Write in structure 'options' the options chosen by the user + % Write in structure 'options' the options chosen by the user. if nargin > 2 for i=1:2:length(varargin) options.(varargin{i})=varargin{i+1}; @@ -233,7 +233,7 @@ resolution=options.resolution; end -% transform table in array +% transform table in array. if verMatlab ==0 && istable(N) N=table2array(N); end @@ -304,7 +304,7 @@ for i = 0:c1 - % The loop below is replaced by a series of vectorized instructions + % The loop below is replaced by a series of vectorized instructions. % for j = 0:c2 % if p>0 % S(rI) = exp(F1(end)-F1(end-i)-F1(i+1)+ F2(end)-F2(end-j)-F2(j+1) +(i+j)*log(p)+(C-(i+j))*log(1-p)); diff --git a/toolbox/multivariate/biplotFS.m b/toolbox/multivariate/biplotFS.m index 45540f419..937a47ad5 100644 --- a/toolbox/multivariate/biplotFS.m +++ b/toolbox/multivariate/biplotFS.m @@ -1,5 +1,5 @@ function biplotFS(Y,varargin) -%biplotFS calls biplotAPP.mlapp to show a dynamic biplot +%biplotFS calls biplotAPP.mlapp to show a dynamic biplot. % %Link to the help function % @@ -13,8 +13,8 @@ function biplotFS(Y,varargin) % \] % where $U_{(2)}$ is $n \times 2$ matrix (first two columns of $U$) % and $V_{(2)}$ is $p \times 2$ (first two columns of matrix $V$, -% $\Gamma_{(2)}^*$ is a $2 \times 2$ diagonal matrix -% which contains the frist two largest singular values of matrix $Z$ +% $\Gamma_{(2)}^*$ is a $2 \times 2$ diagonal matrix, +% which contains the first two largest singular values of matrix $Z$ % (square root of the eigenvalues of matrix $Z^TZ=(n-1)R$) where $R$ is the % correlation matrix. % @@ -53,7 +53,7 @@ function biplotFS(Y,varargin) % \[ % \sqrt{n-1} U_{(2)}= Z V_{(2)} \Gamma_{(2)}^{-1} % \] -% that is row points are the standardized principal components scores. +% that is, row points are the standardized principal components scores. % \[ % cov (\sqrt{n-1} U_{(2)}) = I_2= \left( % \begin{array}{cc} @@ -66,10 +66,10 @@ function biplotFS(Y,varargin) % \[ % \Gamma_{(2)} V_{(2)}^T % \] -% that is the arrows are associated with the correlations between the -% variables and the first two principal components -% The length of the arrow is exactly equal to the communality of the asoociated variable. -% In this case the unit circle is also shown on the screen and option axis +% that is, the arrows are associated with the correlations between the +% variables and the first two principal components. +% The length of the arrow is exactly equal to the communality of the associated variable. +% In this case, the unit circle is also shown on the screen and option axis % equal is set. % % On the other hand, if $\omega=1$ and $\alpha=1$ @@ -78,7 +78,7 @@ function biplotFS(Y,varargin) % \[ % \sqrt{n-1} U_{(2)} \Gamma_{(2)} = Z V_{(2)} % \] -% In this case the row points are the (non normalized) scores, that is +% In this case, the row points are the (non normalized) scores, that is % \[ % cov(\sqrt{n-1} U_{(2)} \Gamma_{(2)}) =cov( Z V_{(2)})= \left( % \begin{array}{cc} @@ -92,10 +92,10 @@ function biplotFS(Y,varargin) % \[ % V_{(2)}^T % \] -% Also in this case the unit circle is given and option axis equal is set. +% Also in this case, the unit circle is given and option axis equal is set. % % In general if $\omega$ decreases, the length of the arrows increases -% and the coordinates of row points are squeezed towards the origin +% and the coordinates of row points are squeezed towards the origin. % % In the app it is also possible to color row points depending on the % orthogonal distance ($OD_i$) of each observation to the PCA subspace. @@ -107,9 +107,9 @@ function biplotFS(Y,varargin) % OD_i=|| z_i- V_{(2)} V_{(2)}' z_i || % \] % -% If optional input argument bsb or bdp is specified it is possible to have -% in the app two tabs which enable the user to select the breakdown point -% of the analysis of the subset size to use in the svd. The units which are +% If an optional input argument, bsb or bdp, is specified, it is possible to have +% in the app two tabs that enable the user to select the breakdown point +% of the analysis of the subset size to use in the svd. The units that are % declared as outliers or the units outside the subset are shown in the % plot with filled circles. % @@ -127,70 +127,70 @@ function biplotFS(Y,varargin) % % Optional input arguments: % -% bsb : units forming subset on which to perform PCA. vector. -% Vector containing the list of the untis to use to -% compute the svd. The other units are projected in the -% space of the first two PC. bsb can be either a numeric -% vector of length m (m<=n) containin the list of the -% units (e.g. 1:50) or a logical vector of length n -% containing the true for the units which have to be used -% in the calculation of svd. For example bsb=true(n,1), -% bsb(13)=false; excludes from the svd unit number 13. -% Note that if bsb is supplied bdp must be empty. -% Example - 'bsb',[2 10:90 93] -% Data Types - double or logical -% -% bdp : breakdown point. Scalar. +% bsb : units forming a subset on which to perform PCA. vector. +% Vector containing the list of the units to use to +% compute the svd. The other units are projected in the +% space of the first two PC. bsb can be either a numeric +% vector of length m (m<=n), containing the list of the +% units (e.g., 1:50), or a logical vector of length n +% containing the true for the units that have to be used +% in the calculation of svd. For example, bsb=true(n,1), +% bsb(13)=false; excludes from the svd unit number 13. +% Note that if bsb is supplied, bdp must be empty. +% Example - 'bsb',[2 10:90 93] +% Data Types - double or logical +% +% bdp : breakdown point. Scalar. % It measures the fraction of outliers the algorithm should -% resist. In this case any value greater than 0 but smaller -% or equal than 0.5 will do fine. Note that if bdp is -% supplied bsb must be empty. -% Example - 'bdp',0.4 -% Data Types - double +% resist. In this case, any value greater than 0, but smaller +% or equal than 0.5, will do fine. Note that if bdp is +% supplied, bsb must be empty. +% Example - 'bdp',0.4 +% Data Types - double % % -% standardize : standardize data. boolean. Boolean which specifies -% whether to standardize the variables, that is we operate on +% standardize : standardize data. boolean. Boolean that specifies +% whether to standardize the variables, that is, we operate on % the correlation matrix (default) or simply remove column % means (in this last case we operate on the covariance % matrix). -% Example - 'standardize',false -% Data Types - boolean -% -% alpha : svd parameter. Scalar. Scalar in the interval [0 1] (see -% section additional details for more help). This parameter can -% be controllad by the corresponding sliding bar when the app is -% shown. -% Example - 'alpha',0.6 -% Data Types - double -% -% omega : svd parameter. Scalar. Scalar in the interval [0 1] (see -% section additional details for more help). This parameter can -% be controllad by the corresponding sliding bar when the app is -% shown. -% Example - 'omega',1 -% Data Types - double -% -% showRowPoints : hide or show row point. Boolean. If showRowPoints is true -% row points are shown in the biplot (default) else there are hidden. -% Example - 'standardize',false -% Data Types - boolean -% -% showRowNames : hide or show labels of row points. Boolean. If showRowNames is true -% labels of row names are shown in the biplot else (default) there are hidden. -% Example - 'showRowNames',false -% Data Types - boolean -% -% showArrows : hide or show arrows. Boolean. If showArrows is true -% arrows (associated labels) labels are shown in the biplot (default) -% else there are hidden. -% Example - 'showArrows',false -% Data Types - boolean +% Example - 'standardize',false +% Data Types - boolean +% +% alpha : svd parameter. Scalar. Scalar in the interval [0 1] (see +% section "additional details" for more help). This parameter can +% be controlled by the corresponding sliding bar when the app is +% shown. +% Example - 'alpha',0.6 +% Data Types - double +% +% omega : svd parameter. Scalar. Scalar in the interval [0 1] (see +% section "additional details" for more help). This parameter can +% be controlled by the corresponding sliding bar when the app is +% shown. +% Example - 'omega',1 +% Data Types - double +% +% showRowPoints : hide or show row point. Boolean. If showRowPoints is true, +% row points are shown in the biplot (default); otherwise they are hidden. +% Example - 'standardize',false +% Data Types - boolean +% +% showRowNames : hide or show labels of row points. Boolean. If showRowNames is true, +% labels of row names are shown in the biplot, otherwise (default) they are hidden. +% Example - 'showRowNames',false +% Data Types - boolean +% +% showArrows : hide or show arrows. Boolean. If showArrows is true, +% arrows (associated labels) labels are shown in the biplot (default); +% otherwise they are hidden. +% Example - 'showArrows',false +% Data Types - boolean % % Output: -% when the biplotAPP is closed in the base workspace a new variable -% called bsbfinalFromAPP is created which contains a logical vector -% of length n containing true for the units which have been used in +% when the biplotAPP is closed, in the base workspace, a new variable +% called bsbfinalFromAPP is created, which contains a logical vector +% of length n containing true for the units that have been used in % the svd. % % @@ -214,9 +214,9 @@ function biplotFS(Y,varargin) %% use of biplotFS on the ingredients dataset. load hald % Operate on the correlation matrix (default). - % use standardized principal components (for row points) and + % Use standardized principal components (for row points) and % correlation between variables and principal components (for column - % points, arrows) + % points, arrows). close all biplotFS(ingredients,'omega',1,'alpha',0); %} diff --git a/toolbox/multivariate/boxtest.m b/toolbox/multivariate/boxtest.m index 0545a7046..255e37071 100644 --- a/toolbox/multivariate/boxtest.m +++ b/toolbox/multivariate/boxtest.m @@ -33,7 +33,7 @@ % % Fapprox : Test based on F approximation. Boolean. If Fapprox is % true, the asymptotic F distribution of the value of Box -% test is also computed. On the other hand is +% test is also computed. On the other hand, if % Fapprox is false (default) just the chi2 approximation is % computed. % Example - 'Fapprox',true @@ -41,7 +41,7 @@ % % dispresults : Display results. Boolean. If dispresults is % true, the value of the test and the associated p-value -% will be shown on the screen. On the other hand is +% will be shown on the screen. On the other hand, if % dispresults is false (default) nothing is shown on the % screen. % Example - 'dispresults',true @@ -49,15 +49,15 @@ % % Output: % -% out: structure which contains the following fields +% out: structure, which contains the following fields % out.LR = scalar which contains Box test (uncorrected) % for equality of covariances. -% This is -2ln M (see 'More About section' for the +% This is -2ln M (see 'More About' section for the % definition of M). % out.LRchi2approx = scalar which contains Box test (corrected) % for equality of covariances. This version is % called $\chi^2$ approximation of the Box test. -% This is $-2(1-c_1)\ln M$ (see further details for the +% This is $-2(1-c_1)\ln M$ (see 'further details' for the % definition of $c_1$ and $M$). This value must % be compared with a $\chi^2$ with $0.5v(v+1)(g-1)$ % degrees of freedom. @@ -67,11 +67,11 @@ % out.LRFapprox = scalar which contains the $F$ approximation of Box % test of homogeneity of covariances. This field is % given just if input option Fapprox is true. -% out.LRFapprox_pval = scalar which contains the p-value of +% out.LRFapprox_pval = scalar which contains the p-value of % $F$ approximation of Box test of homogeneity of % covariances. This field is given just if input % option Fapprox is true. -% out.Spl = pooled variance covariance matrix. +% out.Spl = pooled variance covariance matrix. % % % More About: @@ -91,13 +91,13 @@ % \] % % where $S_i$ is the covariance matrix of group $i$ and $S_{pl}$ is the -% pooled sample covariance matrix. It is clear that we must have $n_i-1>v$ +% pooled sample covariance matrix. It is clear that we must have $n_i-1>v$; % otherwise $|S_i|=0$ for some $i$ and $M$ would be zero. The statistic $M$ % is a modification of the likelihood ratio test and varies between 0 and 1 % with values near 1 favouring $H_0$ and values near 0 leading to the % rejection of $H_0$ (see Rencher (2002) p. 256 for further details). The % quantity $-2 \ln M$ is approximately distributed as a $\chi^2$ -% distribution and is given in $\mbox{out.LR}$. The quantity $-2(1-c_1) \ln M$ +% distribution and is given in $\mbox{out.LR}$. The quantity $-2(1-c_1) \ln M$ % (where $c_1$ is a small sample correction factor) is usually called % correct Box test and is approximately distributed as a $\chi^2$ with $0.5 % (g-1) v(v+1)$ degrees of freedom. We reject $H_0$ if @@ -137,25 +137,25 @@ %{ % Box test for Iris data. - % load iris data + % load iris data. load fisheriris - % Compute Box test of equality of covariance matrices + % Compute Box test of equality of covariance matrices. out=boxtest(meas,species); %} %{ %% Box test for Iris data displaying results. - % load iris data + % load iris data. load fisheriris - % Compute Box test of equality of covariance matrices + % Compute Box test of equality of covariance matrices. out=boxtest(meas,species,'dispresults',true); %} %{ %% Box test for Iris with option Fapprox. - % load iris data + % load iris data. load fisheriris - % Compute Box test of equality of covariance matrices + % Compute Box test of equality of covariance matrices. out=boxtest(meas,species,'dispresults',true,'Fapprox',true) %} @@ -168,7 +168,7 @@ error('FSDA:boxtest:missingInputs','Grouping variable is missing') end -% test version for releases older than 2013b in order to use option upper inside cdf +% test version for releases older than 2013b in order to use option upper inside cdf. vertest=verLessThan('matlab','8.2.0'); Fapprox=false; @@ -180,15 +180,15 @@ [varargin{:}] = convertStringsToChars(varargin{:}); UserOptions=varargin(1:2:length(varargin)); if ~isempty(UserOptions) - % Check if number of supplied options is valid + % Check if number of supplied options is valid. if length(varargin) ~= 2*length(UserOptions) error('FSDA:boxtest:WrongInputOpt','Number of supplied options is invalid. Probably values for some parameters are missing.'); end - % Check if user options are valid options + % Check if user options are valid options. aux.chkoptions(options,UserOptions) end - % Write in structure 'options' the options chosen by the user + % Write in structure 'options' the options chosen by the user. for i=1:2:length(varargin) options.(varargin{i})=varargin{i+1}; end @@ -197,7 +197,7 @@ end -% Convert group to cell array from character array, make it a column +% Convert group to cell array from character array, make it a column. if (ischar(group)) group = cellstr(group); end @@ -206,22 +206,22 @@ group = group'; end -% Make sure inputs have the correct size +% Make sure inputs have the correct size. n = size(Y,1); if (size(group,1) ~= n) error(message('FSDA:boxtest:InputSizeMismatch')); end -% Remove missing Y columns first in case this eliminates a group +% Remove missing Y columns first in case this eliminates a group. nonan = (sum(isnan(Y), 2) == 0); Y = Y(nonan,:); group = group(nonan,:); -% Convert group to indices 1,...,g and separate names +% Convert group to indices 1,...,g and separate names. [groupnum, gnames] = grp2idx(group); ngroups = length(gnames); -% Remove NaN values again +% Remove NaN values again. nonan = ~isnan(groupnum); if (~all(nonan)) groupnum = groupnum(nonan); @@ -230,7 +230,7 @@ [n,p] = size(Y); realgroups = ismember(1:ngroups,groupnum); -% g = number of groups +% g = number of groups. g = sum(realgroups); % W = within sum of squares matrix @@ -296,7 +296,7 @@ end % Also compute if requested F approximation of the test and associated -% p-value +% p-value. if Fapprox == true c2=((p-1)*(p+2)/(6*(g-1)))* ( sum(1./((sz-1).^2)) -1/(sum(sz-1))^2); a1=0.5*(g-1)*p*(p+1); @@ -339,10 +339,10 @@ out=struct; out.LR=LR; -% Store chi2 version of LR test +% Store chi2 version of LR test. out.LRchi2approx=LRchi2approx; out.LRchi2approx_pval=LRchi2approx_pval; -% Store F version of LR test +% Store F version of LR test. if Fapprox== true out.LRFapprox=LRFapprox; out.LRFapprox_pval=LRFapprox_pval;