library(MASS)
library(car)

data(longley) # get the data
longley # print the data
? longley # get information about the data set

names(longley)[1] <- "y" # rename GNP.deflator (first column in the data set)
summary(longley) # get summery statistics

model<-lm(y~GNP+Unemployed+Armed.Forces+Population+Year+Employed, data=longley) # fit the MLRM
summary(model) # very high R^2_adj

1-1/vif(model) #Rj^2 partial coefficients of determination, we have a multicollinearity problem

# Ridge Regression (lm.ridge has scaling built in and transfer the results back!)

ridge.model<-lm.ridge(y ~ ., longley,lambda = seq(0,0.1,0.001))
plot(ridge.model)
select(ridge.model)
# looks as if k=0.01 is a reasonable choice
ridge.0.01<-lm.ridge(y ~ ., longley,lambda =0.01)
ridge.0.01 
model$coef # coefficients have changed!


###########################
# Comparing the ridge-regression fit to the original least-squares fit:
#

# The X matrix for this problem:
attach(longley)
X.matrix <- cbind(rep(1,length=length(longley$y)),GNP,Unemployed,Armed.Forces,Population,Year,Employed)
X.matrix



# Getting the fitted values for the ridge-regression fit:
fitted.vals <- X.matrix %*% (coef(ridge.0.01))
fitted.vals



# Getting the SSE for the ridge-regression fit:
sse.ridge <- sum((longley$y-fitted.vals)^2); sse.ridge


# The original least-squares fit:
model<-lm(y~GNP+Unemployed+Armed.Forces+Population+Year+Employed, data=longley)

# Getting the SSE for the original least-squares fit:
sum(resid(model)^2)

# The SSE for the ridge-regression fit is almost the same, which is good.