#### a silly non-parametric approach that illustrates what we're doing SLID <- read.table('http://math.uttyler.edu/nathan/classes/statistics/data/slid.data',header=TRUE) SLID[1:10,] attach(SLID) plot(education,wages) boxplot(wages[education==7],wages[education==8],wages[education==9], wages[education==10],wages[education==11],wages[education==12], wages[education==13],wages[education==14],wages[education==15], wages[education==16],wages[education==17],wages[education==18], wages[education==19],wages[education==20], main='Wages at various educaiton levels', xlab='Education', names=c('7','8','9','10','11','12','13','14','15','16','17','18','19','20')) ### note there is an entire (wages) population for each level of ### education -- this is important to grasp plot(education,wages) for (i in 0:20) { points(i,mean(na.omit(wages[education==i])),pch=19,col='red')} lines(0:20,sapply(0:20,function (i) {mean(na.omit(wages[education==i]))}),col='red') detach(SLID) ### we'll beat this data set to death in this class, you'll eventually ### grow to hate it, so consider beating it to death a small pleasure! ### from this point on we're parametric again Prestige <- read.table('http://math.uttyler.edu/nathan/classes/statistics/data/prestige.data',header=TRUE) Prestige[1:10,] attach(Prestige) plot(education,income) names(Prestige) mod.1 <- lm(income ~ education) abline(mod.1,col='red') summary(mod.1) ### what do all of these numbers mean? abline(h=mean(income[income<15000]),col='green') ### is red better than green?