Education <- read.table('http://math.uttyler.edu/nathan/classes/grad-statistics/data/eduspend.data',header=TRUE) plot(Education) attach(Education) income.model <- lm(education ~ income) summary(income.model) # Estimate Std. Error t value Pr(>|t|) #(Intercept) 17.710031 28.873840 0.613 0.542 #income 0.055376 0.008823 6.276 8.76e-08 *** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 34.94 on 49 degrees of freedom #Multiple R-Squared: 0.4457, Adjusted R-squared: 0.4343 #F-statistic: 39.39 on 1 and 49 DF, p-value: 8.762e-08 # equation of the regression line: # education = 17.710031 + .055376*income # # rse = 34.94 model: education_i = b0 + b1*income_i + e_i # we assumed Var(e_i) = sigma_e^2 # rse is point estimate for sigma_e # # r^2 = .4457 our model explains 44.57% of the variability in ed. spending # 44.57% of the variability in ed. spending is due to the # linear relationship between income and ed. spending # # what is the effect of a $100 increase in state per capita income on the # state's education spending? 100*.055376 or about $5.54 plot(income,education) abline(income.model) identify(income,education) big.model <- lm(education ~ income + young + urban) summary(big.model) # Estimate Std. Error t value Pr(>|t|) #(Intercept) -2.868e+02 6.492e+01 -4.418 5.82e-05 *** #income 8.065e-02 9.299e-03 8.674 2.56e-11 *** #young 8.173e-01 1.598e-01 5.115 5.69e-06 *** #urban -1.058e-01 3.428e-02 -3.086 0.00339 ** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # #Residual standard error: 26.69 on 47 degrees of freedom #Multiple R-Squared: 0.6896, Adjusted R-squared: 0.6698 #F-statistic: 34.81 on 3 and 47 DF, p-value: 5.337e-12 # now my model looks like: # # education_i = b_0 + b_1 income_i + b_2 young_i + b3 urban_i + e_i # = -286.8 + .08065 income_i + .8173 young_i + -.1058 urban_i # /\ /\ /\ # | | | # | | | # these are now partial derivatives!!!!!! # standard partial derivative interpretation # keep everything else the same, change only # that variable summary(lm(education ~ urban)) # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 142.60415 28.81579 4.949 9.22e-06 *** # urban 0.08083 0.04230 1.911 0.0619 . detach(Education) Prostate <- read.table('http://math.uttyler.edu/nathan/classes/grad-statistics/data/prostate.data',header=TRUE) attach(Prostate) Prostate[1:10,] mod.1 <- lm(lpsa ~ lcavol) summary(mod.1) mod.2 <- lm(lpsa ~ lcavol + lweight) summary(mod.2) mod.3 <- lm(lpsa ~ lcavol + lweight + svi) summary(mod.3) # at the end of problem number 4 you've got 2 lists: my.rsqs <- c(1,2,8,4,5,3,9) # the r^2 numbers my.rses <- c(6,5,4,3,7,5,2) # the residual std errors # don't do this: plot(my.rsqs,my.rses) # this looks at rse as a function of r^2, it's not, and that's not # what the problem is about! # we're looking to combine the following two plots: plot(my.rsqs) plot(my.rses) summary(my.rsqs) summary(my.rses) # look at the biggest numbers, here the max is 9, min is 1 # so the range over which I want to plot is 1 < x < 7, 1 < y < 9 # let's go a little wider on each: 0 < x < 8, 0 < y < 10 plot(c(0,8),c(0,10),type='n',main='rse and r^2',xlab='# terms in model',ylab='number') points(my.rsqs,col='blue',pch=19) points(my.rses,col='red',pch=22)