% Perform the classification tree using "fitctree" function
% Author: Chia-Feng Lu, 2021.03.29
clear, close all

%% load Heart data.
[num,txt,raw]=xlsread('Heart.csv');

excludeind=[];
% construct Table array
data=cell2table(raw(2:end,2:end));
for i=1:size(data,2)
    data.Properties.VariableNames{i}=raw{1,i+1}; % assign the VariableNames   
    
    % identify patients with missing data
    if iscell(data{:,i})
        tmpind=find(ismember(data{:,i},'NA'));  
    else
       tmpind=find(isnan(data{:,i}));  
    end
    excludeind=[excludeind, tmpind'];        
end
data(unique(excludeind),:)=[]; % remove the patients with missing data

%% Separate data into training (70%) and test (30%) datasets
rng(0,'twister')  % For reproducibility

C = cvpartition(size(data,1),'holdout',0.30); % randomly hold out 30% of subjects for test
dataTrain = data(C.training,:);
dataTest = data(C.test,:);

%% [Method 1] Construct a classification tree using all variables/features
% Construct a classification tree using the training dataset
predictors={'Age', 'Sex', 'ChestPain', 'RestBP', 'Chol', 'Fbs', 'RestECG', 'MaxHR', 'ExAng', 'Oldpeak', 'Slope', 'Ca', 'Thal'};
tree_allv = fitctree(dataTrain,'AHD','PredictorNames',predictors,...
    'SplitCriterion', 'deviance', ...  % 'deviance': cross entropy, 'gdi': Gini index
    'OptimizeHyperparameters','all',...
    'HyperparameterOptimizationOptions',...
    struct('UseParallel',true,'AcquisitionFunctionName','expected-improvement-plus','kfold',5));

view(tree_allv,'Mode','graph')

AHD_predict=predict(tree_allv,dataTest);
[cm,order] = confusionmat(dataTest.AHD,AHD_predict)
accurarcy = trace(cm)/sum(cm(:))

%% Variable/Feature selection based on the importance scores
imp = predictorImportance(tree_allv);

figure;
bar(imp);
title('Predictor Importance Estimates');
ylabel('Estimates');
xlabel('Predictors');
h = gca;
h.XTickLabel = tree_allv.PredictorNames;
h.XTickLabelRotation = 45;
h.TickLabelInterpreter = 'none';

return
%% ClassifierLearner
classificationLearner
