% Perform the parametric polynomial regression 
% Author: Chia-Feng Lu, 2020.04.08
clear, close all

%% load Auto data: 
[num,txt,raw]=xlsread('Auto.csv');
mpg=num(:,1); % gas mileage in miles per gallon
horsepower=num(:,4); 

excludeind=find(isnan(horsepower));  % identify the indices with missing horsepower data
mpg(excludeind,:)=[]; % remove the observations with missing horsepower data
horsepower(excludeind,:)=[]; % remove the observations with missing horsepower data

%% perform polynomial regression
p_d1=polyfit(horsepower,mpg,1); % linear
RSS=sum((polyval(p_d1,horsepower)-mpg).^2); % residual sum of square
TSS=sum((mpg-mean(mpg)).^2); % total sum of square
R_square_d1=1-RSS/TSS;

p_d2=polyfit(horsepower,mpg,2); % Degree 2
RSS=sum((polyval(p_d2,horsepower)-mpg).^2); % residual sum of square
TSS=sum((mpg-mean(mpg)).^2); % total sum of square
R_square_d2=1-RSS/TSS;

p_d5=polyfit(horsepower,mpg,5); % Degree 5
RSS=sum((polyval(p_d5,horsepower)-mpg).^2); % residual sum of square
TSS=sum((mpg-mean(mpg)).^2); % total sum of square
R_square_d5=1-RSS/TSS;

figure,
scatter(horsepower,mpg), hold on
x=sort(horsepower);
plot(x,polyval(p_d1,x),'k-','linewidth',2)
plot(x,polyval(p_d2,x),'r-','linewidth',2)
plot(x,polyval(p_d5,x),'g-','linewidth',2)
xlabel('horsepower'), ylabel('mpg')
legend('Observations',...
             ['Linear, R^2=' num2str(R_square_d1)],...
             ['Degree 2, R^2=' num2str(R_square_d2)],...
             ['Degree 5, R^2=' num2str(R_square_d5)])