% Perform the Bagged classification tree using "fitcensemble" function
% Author: Chia-Feng Lu, 2021.04.07
clear, close all

%% load Heart data.
[num,txt,raw]=xlsread('Heart.csv');

excludeind=[];
% construct Table array
data=cell2table(raw(2:end,2:end));
for i=1:size(data,2)
    data.Properties.VariableNames{i}=raw{1,i+1}; % assign the VariableNames   
    
    % identify patients with missing data
    if iscell(data{:,i})
        tmpind=find(ismember(data{:,i},'NA'));  
    else
       tmpind=find(isnan(data{:,i}));  
    end
    excludeind=[excludeind, tmpind'];        
end
data(unique(excludeind),:)=[]; % remove the patients with missing data

%% Separate data into training (70%) and test (30%) datasets
rng(0,'twister')  % For reproducibility

C = cvpartition(size(data,1),'holdout',0.30); % randomly hold out 30% of subjects for test
dataTrain = data(C.training,:);
dataTest = data(C.test,:);

%% [Method 1] Construct a SVM classifier using all variables/features
% Construct a SVM classificatier using the training dataset
rng(0,'twister')  % For reproducibility

predictors={'Age', 'Sex', 'ChestPain', 'RestBP', 'Chol', 'Fbs', 'RestECG', 'MaxHR', 'ExAng', 'Oldpeak', 'Slope', 'Ca', 'Thal'};

svm_model = fitcsvm(dataTrain,'AHD','PredictorNames',predictors,...
    'OptimizeHyperparameters','all',...
    'HyperparameterOptimizationOptions',...
    struct('AcquisitionFunctionName','expected-improvement-plus','kfold',5));
% 'KernelFunction':  linear, polynomial, gaussian (or rbf)
% 'Standardize'

AHD_predict=predict(svm_model,dataTest);
[cm,order] = confusionmat(dataTest.AHD,AHD_predict)
accurarcy = trace(cm)/sum(cm(:))

save('trainedSVMmodel.mat','svm_model')

return
%% ClassifierLearner
classificationLearner
