library(MASS) # let's generate some highly collinear xs d.1 <- runif(50,-10,10) d.2 <- runif(50,-10,10) slopes.1 <- rnorm(10,sd=10) slopes.2 <- rnorm(10,sd=10) intercepts.1 <- runif(10,-10,10) intercepts.2 <- runif(10,-10,10) sds.1 <- runif(10,1,10) sds.2 <- runif(10,5,20) x1.to.x10 <- sapply(1:10, function (i) {slopes.1[i]*d.1+intercepts.1[i]+rnorm(50,sd=sds.1[i])}) x11.to.x20 <- sapply(1:10, function (i) {slopes.2[i]*d.2+intercepts.2[i]+rnorm(50,sd=sds.2[i])}) plot(data.frame(x1.to.x10)) # now the ys slopes <- runif(21,-10,10) y <- apply(rep(slopes[1],50) + sapply(2:11,function (i) {slopes[i]*x1.to.x10[,i-1]}) + sapply(12:21,function (i){slopes[i]*x11.to.x20[,i-11]}), 1,sum) y <- y + rnorm(50,sd=20) # add some randomness my.data <- data.frame(y,x1.to.x10,x11.to.x20) names(my.data) <- c("Y","X1","X2","X3","X4","X5","X6","X7","X8","X9","X10", "X11","X12","X13","X14","X15","X16","X17","X18","X19","X20") big.model <- lm(Y ~ .,data=my.data) summary(big.model)$coefficients[,1] slopes # now let's ask "How well did we do?" We'll make some more Xs and the # real Ys more.x1.to.x10 <- sapply(1:10, function (i) {slopes.1[i]*d.1+intercepts.1[i]+rnorm(50,sd=sds.1[i])}) more.x11.to.x20 <- sapply(1:10, function (i) {slopes.2[i]*d.2+intercepts.2[i]+rnorm(50,sd=sds.2[i])}) more.xs <- data.frame(x1.to.x10,x11.to.x20) names(more.xs) <- c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10", "X11","X12","X13","X14","X15","X16","X17","X18","X19","X20") more.y <- apply(rep(slopes[1],50) + sapply(2:11,function (i) {slopes[i]*x1.to.x10[,i-1]}) + sapply(12:21,function (i){slopes[i]*x11.to.x20[,i-11]}), 1,sum) predicted.ys <- predict(big.model,more.xs) root.mean.square.error <- sqrt(mean( (predicted.ys - more.y)^2 )) root.mean.square.error # the error we think we should see summary(big.model)$sigma # the error preidicting the y's used to build the model sqrt(mean( (predict(big.model) - y)^2 )) #### now let's do ridge regression my.center.data <- sweep(my.data,2,mean(my.data)) my.ridge <- lm.ridge(Y ~ .,data=my.center.data,lambda=seq(0,.02,.001)) select(my.ridge) my.ridge.model <- lm.ridge(Y ~ .,data=my.center.data,lambda=.003) matplot(my.ridge$lambda,t(my.ridge$coef),type='l') abline(v=.003) abline(v=.005638393) abline(v=.05016273) names(my.ridge.model) my.ridge.model$coef my.ridge.model$scales my.ridge.pred <- scale(my.center.data[,-1],center=FALSE,scale=my.ridge.model$scale) %*% my.ridge.model$coef + mean(my.data[,1]) sqrt(mean( (my.ridge.pred - y)^2 )) cmore.xs <- sweep(more.xs,2,mean(more.xs)) more.preds <- scale(cmore.xs,center=FALSE,scale=my.ridge.model$scale) %*% my.ridge.model$coef + mean(my.data[,1]) sqrt(mean( (more.preds - more.y)^2 ))