-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode.m
50 lines (39 loc) · 1.61 KB
/
code.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
fid = fopen('train.csv');
train = textscan(fid, '%d%d%d%q%s%d%d%d%s%f%s%s', 'delimiter', ',', ...
'headerLines', 1);
fclose(fid);
varNames = { 'PassengerId', 'Survived', 'Pclass', 'Name', 'Sex',...
'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked' };
xNames = { 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare' };
yNames = { 'Survived' };
trainIdx = 1:2:length(train{1});
testIdx = 2:2:length(train{1});
trainSet = dataset(train{:});
trainSet.Properties.VarNames = varNames;
trainSet.Sex = grp2idx(cellstr(trainSet.Sex));
xTrain = double(trainSet(trainIdx, xNames));
yTrain = double(trainSet(trainIdx, yNames));
xTest = double(trainSet(testIdx, xNames));
yTest = double(trainSet(testIdx, yNames));
T = classregtree(xTrain, yTrain, 'method', 'classification', ...
'splitcriterion', 'twoing', 'categorical', [1 2], 'names', xNames );
[numNodes, errors] = test_tree(T, xTest, yTest);
[minErrors, minIdx] = min(errors);
optimalTree = prune(T, 'level', minIdx);
%------------------
fid = fopen('data/test.csv');
test = textscan(fid, '%d%d%q%s%d%d%d%s%f%s%s', 'delimiter', ',', ...
'headerLines', 1);
fclose(fid);
ctrlSet = dataset(test{:});
ctrlSet.Properties.VarNames = { 'PassengerId', 'Pclass', 'Name', 'Sex',...
'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked' };
ctrlSet.Sex = grp2idx(cellstr(ctrlSet.Sex));
xCtrl = double(ctrlSet(:, xNames));
yCtrl = eval(optimalTree, xCtrl);
yCtrl = str2num(cell2mat(yCtrl));
fname = 'output.csv';
fid = fopen(fname, 'w');
fprintf(fid, 'PassengerId,Survived\n');
fclose(fid);
dlmwrite(fname, [ctrlSet.PassengerId yCtrl], '-append', 'delimiter', ',');