library(MASS) library(car) data(longley) # get the data longley # print the data ? longley # get information about the data set names(longley)[1] <- "y" # rename GNP.deflator (first column in the data set) summary(longley) # get summery statistics model<-lm(y~GNP+Unemployed+Armed.Forces+Population+Year+Employed, data=longley) # fit the MLRM summary(model) # very high R^2_adj 1-1/vif(model) #Rj^2 partial coefficients of determination, we have a multicollinearity problem # Ridge Regression (lm.ridge has scaling built in and transfer the results back!) ridge.model<-lm.ridge(y ~ ., longley,lambda = seq(0,0.1,0.001)) plot(ridge.model) select(ridge.model) # looks as if k=0.01 is a reasonable choice ridge.0.01<-lm.ridge(y ~ ., longley,lambda =0.01) ridge.0.01 model$coef # coefficients have changed! ########################### # Comparing the ridge-regression fit to the original least-squares fit: # # The X matrix for this problem: attach(longley) X.matrix <- cbind(rep(1,length=length(longley$y)),GNP,Unemployed,Armed.Forces,Population,Year,Employed) X.matrix # Getting the fitted values for the ridge-regression fit: fitted.vals <- X.matrix %*% (coef(ridge.0.01)) fitted.vals # Getting the SSE for the ridge-regression fit: sse.ridge <- sum((longley$y-fitted.vals)^2); sse.ridge # The original least-squares fit: model<-lm(y~GNP+Unemployed+Armed.Forces+Population+Year+Employed, data=longley) # Getting the SSE for the original least-squares fit: sum(resid(model)^2) # The SSE for the ridge-regression fit is almost the same, which is good.