%% An exercise of K-means clustering
%% Chia-Feng Lu, 2020.03.25
clear, close all

%% Fisher's Iris dataset
% 50 samples from each of three species of?Iris
% Four features were measured from each sample: the length and the width of the sepals and petals (in cm)
load fisheriris
figure,
plot3(meas(:,1),meas(:,2),meas(:,3),'k.','markersize',10) % only plot first 3 features 
grid on
xlabel('feature 1'),ylabel('feature 2'),zlabel('feature 3')

%% Perform K-means clustering on the dataset
K=3;
[ind,C,sumd] = kmeans(meas,K);

figure, hold on
plot3(meas(ind==1,1),meas(ind==1,2),meas(ind==1,3),'r.','markersize',10) % only plot first 3 features
plot3(C(1,1),C(1,2),C(1,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
plot3(meas(ind==2,1),meas(ind==2,2),meas(ind==2,3),'g.','markersize',10) % only plot first 3 features 
plot3(C(2,1),C(2,2),C(2,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
plot3(meas(ind==3,1),meas(ind==3,2),meas(ind==3,3),'b.','markersize',10) % only plot first 3 features 
plot3(C(3,1),C(3,2),C(3,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
view(3)

grid on
xlabel('feature 1'),ylabel('feature 2'),zlabel('feature 3')
title(['Total sum of dist = ', num2str(sum(sumd))])

%% Perform K-means clustering with 20 replicates and parallel computing
opts = statset('Display','final','UseParallel',1);
[ind,C,sumd] = kmeans(meas,3,'MaxIter',10000,...
   'Replicates',20,'Options',opts);

figure, hold on
plot3(meas(ind==1,1),meas(ind==1,2),meas(ind==1,3),'r.','markersize',10) % only plot first 3 features
plot3(C(1,1),C(1,2),C(1,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
plot3(meas(ind==2,1),meas(ind==2,2),meas(ind==2,3),'g.','markersize',10) % only plot first 3 features 
plot3(C(2,1),C(2,2),C(2,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
plot3(meas(ind==3,1),meas(ind==3,2),meas(ind==3,3),'b.','markersize',10) % only plot first 3 features 
plot3(C(3,1),C(3,2),C(3,3),'kx','markersize',20,'linewidth',3) % only plot first 3 features
view(3)

grid on
xlabel('feature 1'),ylabel('feature 2'),zlabel('feature 3')
title(['Total sum of dist = ', num2str(sum(sumd))])