%% An exercise of Gaussian mixture model (GMM) for soft clustering
%% An example from MATLAB "Cluster Gaussian Mixture Data Using Soft Clustering"
%% Chia-Feng Lu, 2020.03.25
clear, close all

%% Create simulated data from a mixture of two bivariate Gaussian distributions.
rng(0,'twister')  % For reproducibility
mu1 = [1 0];
sigma1 = [3 .2; .2 2];
mu2 = [-1 -2];
sigma2 = [2 0; 0 1];
X = [mvnrnd(mu1,sigma1,200); mvnrnd(mu2,sigma2,100)];

figure, hold on
plot(X(:,1),X(:,2),'k.','markersize',10) % only plot first 3 features 
xlabel('feature 1'),ylabel('feature 2')

%% Fit a two-component Gaussian mixture model (GMM)
K=2; % number of clusters
gm = fitgmdist(X,K);
for i=1:K
    [Xt,Yt,Z]=plot_2D_gauss(gm.mu(i,:),gm.Sigma(:,:,i));
    contour(Xt,Yt,Z,7,'linewidth',1);
    colormap(hsv)
end

%% Estimate component-member posterior probabilities for all data points using the fitted GMM gm.
P = posterior(gm,X);

n = size(X,1);
[~,order] = sort(P(:,1));

figure
plot(1:n,P(order,1),'r-',1:n,P(order,2),'b-')
legend({'Cluster 1', 'Cluster 2'})
ylabel('Cluster Membership Score')
xlabel('Point Ranking')
title('GMM with Full Unshared Covariances')

%% Plot the data and assign clusters by maximum posterior probability. 
% Identify points that could be in either cluster.
threshold = [0.4 0.6];

ind = cluster(gm,X);
indBoth = find(P(:,1)>=threshold(1) & P(:,1)<=threshold(2)); 
numInBoth = numel(indBoth)

figure
gscatter(X(:,1),X(:,2),ind,'rb','+o',5)
hold on
plot(X(indBoth,1),X(indBoth,2),'ko','MarkerSize',10)
legend({'Cluster 1','Cluster 2','Both Clusters'},'Location','SouthEast')
title('Scatter Plot - GMM with Full Unshared Covariances')

