Statistics Test Two Key

  1. Consider the data on car speed/stopping distance. Fit a regression model of the stopping distance for the cars (y) predicted by the speed (x).
    Cars <- read.table('cars.data',header=TRUE)
    Cars[1:5,]
    attach(Cars)
    
    1. Plot the data along with the regression line.
    2. m1 <- lm(dist ~ speed)
      summary(m1)
      plot(speed,dist)
      abline(m1)
      lines(lowess(speed,dist),col='red')
      
      
    3. Test for outliers.
    4. plot(c(0,51),c(0,3.6),type='n',main='Jacknife Residuals m1')
      points(abs(rstudent(m1)))
      abline(h=qt(1-.05/100,df=48))
      qt(1-.05/100,df=48)
      abs(rstudent(m1))[abs(rstudent(m1))>3]
      #28 and 49 are close, but no outliers according to this test
      
      
    5. Test for influential points.
    6. plot(abs(dffits(m1)),main='Dffits m1')
      abline(h=2*sqrt(2/50))
      dffits(m1)[abs(dffits(m1))>2*sqrt(2/50)]
      # 23 and  49
      identify(abs(dffits(m1)))
      
      
      plot(abs(dfbetas(m1)[,1]),main='dfbetas m1 (intercept)')
      abline(h=2/sqrt(50))
      dfbetas(m1)[,1][abs(dfbetas(m1)[,1])>2/sqrt(50)]
      # 2 and 49
      identify(abs(dfbetas(m1)[,1]))
      
      
      plot(abs(dfbetas(m1)[,2]),main='dfbetas m1 (slope)')
      abline(h=2/sqrt(50))
      dfbetas(m1)[,2][abs(dfbetas(m1)[,2])>2/sqrt(50)]
      # 49
      identify(abs(dfbetas(m1)[,2]),main='dfbetas m1 (slope)')
      
      
    7. Test for high leverage points.
    8. plot(hat(model.matrix(m1)))
      abline(h=4/50)
      identify(hat(model.matrix(m1)))
      #1 2 and 50
      
      
    9. Give a 95% confidence interval for the effect of speed on the stopping distance.
    10. confint(m1)
      # 3.096964 to 4.767853
      
      detach(Cars)
  2. Consider the data on Stack loss concerning the loss of ammonia as nitric acid up the smokestack.
  3. Stack <- read.table('stack.data',header=TRUE)
    attach(Stack)
    
    1. Fit the model with stack.loss as the y and the other variables as predictors. Give 90% confidence intervals for each of the slopes.
    2. m2 <- lm(stack.loss ~ Air.Flow + Water.Temp + Acid.Conc.) 
      confint(m2,level=.9)
      
      #Air.Flow      0.4810400   0.9502404
      #Water.Temp    0.6550686   1.9355036
      #Acid.Conc.   -0.4240127   0.1197676
      
      
    3. Construct an added variable for Water.Temp and comment on what you see.
    4. t1 <- lm(Water.Temp ~ Air.Flow + Acid.Conc.)
      t2 <- lm(stack.loss ~ Air.Flow + Acid.Conc.)
      plot(t1$resid,t2$resid,main="Added Variable Plot water.temp")
      lines(lowess(t1$resid,t2$resid),col='red')
      identify(t1$resid,t2$resid)
      abline(lm(t2$resid ~ t1$resid))
      #21 is down in bottom right corner, possibly influential here
      abline(lm(t2$resid ~ t1$resid,subset=-21),col='blue')
      
      
    5. Construct a partial-plus-residual plot for Water.Temp and comment on what you see.
    6. plot(Water.Temp,m2$residuals + 1.2953*Water.Temp,main="CR plot water temp")
      lines(lowess(Water.Temp,m2$residuals + 1.2953*Water.Temp),col='red')
      abline(lm(1.2953*Water.Temp+m2$residuals ~ Water.Temp))
      # fairly prominent curved pattern here
      identify(Water.Temp,m2$residuals + 1.2953*Water.Temp,main="CR plot water temp")
      
      
    7. Predict the stack loss for an average (mean) level of Air.Flow and Acid.Conc. and a level of Water.Temp one standard deviation below the mean
    8. predict(m2,newdata=data.frame(Air.Flow=mean(Air.Flow),
                                    Water.Temp=mean(Water.Temp)-sd(Water.Temp),
                                    Acid.Conc.=mean(Acid.Conc.)))
      # 13.42971
      
      detach(Stack)