This repository was archived by the owner on Nov 22, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_dumm.m
44 lines (36 loc) · 1.44 KB
/
create_dumm.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
function [data, quiz] = create_dumm(data_in,quiz_in,var_name, min_num)
% creates dummy variable for a given categorical column variable.
% takes as parameter the minumum number of obs. to sumarize in column
% 'other'
% replace low frequency by 'other'
col= tabulate(categorical(table2cell(data_in(:,var_name))));
fie = col(cell2mat(col(:,2))>min_num,1);
others_id = ~ismember(table2cell(data_in(:,var_name)),fie);
if(sum(others_id)>0)
data_in(others_id,var_name)={'other'};
end;
others_id = ~ismember(table2cell(quiz_in(:,var_name)),fie);
if(sum(others_id)>1)
quiz_in(others_id,var_name)={'other'};
elseif (sum(others_id)>0)
quiz_in(others_id,var_name)=cell2table({'other'});
end;
% create dummy in data
col = table2cell(data_in(:,var_name));
col= categorical(col);
D1 = dummyvar(col);
data_in = [data_in array2table(D1,'VariableNames',...
strseq([var_name '_'],...
1:size(D1,2)))];
data_in(:,var_name) =[];
data=data_in;
% create dummy in quiz
col = table2cell(quiz_in(:,var_name));
col= categorical(col);
D1 = dummyvar(col);
quiz_in = [quiz_in array2table(D1,'VariableNames',...
strseq([var_name '_'],...
1:size(D1,2)))];
quiz_in(:,var_name) =[];
quiz=quiz_in;
end