%% An exercise of hirarchical clustering
%% Chia-Feng Lu, 2020.03.25
clear, close all

%% NCI60 Cancer Cell Line Data
[numdata,CellLine,raw]=xlsread('NCI60data.csv');
numdata(1,:)=[]; % remove the first row in num

%% standardize the variables to have mean zero and standard deviation one.
Z=zscore(numdata);

%% Find the similarity or dissimilarity between every pair of objects in the data set.
D=pdist(Z,'euclidean');  % D is a 1-by-(M*(M-1)/2) row vector. M is the number of observations.

%% Group the objects into a binary,?hierarchical?cluster?tree.
CT_complete=linkage(D,'complete');
figure,
[H,T,outperm_complete]=dendrogram(CT_complete,0,'Labels',CellLine,'Orientation','top');
set(gca,'XTickLabelRotation',90)
title('Complete Linkage')

CT_average=linkage(D,'average');
figure,
[H,T,outperm_average]=dendrogram(CT_average,0,'Labels',CellLine,'Orientation','top');
set(gca,'XTickLabelRotation',90)
title('Average Linkage')

CT_single=linkage(D,'single');
figure,
[H,T,outperm_single]=dendrogram(CT_single,0,'Labels',CellLine,'Orientation','top');
set(gca,'XTickLabelRotation',90)
title('Single Linkage')

%% Determine where to cut the?hierarchical?tree into?clusters.
K=5; % number of clusters
Clabel = cluster(CT_complete,'maxclust',K);

%% Display the original and reordered hotmap
cmap=[zeros(5,1), linspace(1,0,5)',zeros(5,1) ; linspace(0,1,5)',zeros(5,2)];
cmap(6,:)=[];

figure, 
subplot(1,2,1),imagesc(Z(:,1:500)'),title('Original Hotmap (part)')
subplot(1,2,2),imagesc(Z(outperm_complete,1:500)'),title('Clustered Hotmap (part)')
colormap(cmap)
