#Import SPSS data into R install.packages("Rcmdr") library(Rcmdr) library(foreign, pos=15) lafrance <- read.spss(file.choose(), use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) lafrance5 <- read.spss(file.choose(), use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) beqSwear <- read.spss(file.choose(), use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) 7.1.1 Coplots coplot(l2phonemicawareness~grade1l1reading|nonverbalreasoning, panel= function(x,y,...) panel.smooth(x,y,span=.8,...),data=lafrance) coplot(l2phonemicawareness~grade1l1reading|nonverbalreasoning*namingspeed, panel= function(x,y,...) panel.smooth(x,y,span=.8,...),data=lafrance) 7.1.2 3D Graphs scatter3d(lafrance$kinderl2reading, lafrance$grade1l1reading, lafrance$l2phonemicawareness, fit=c("linear","smooth"), bg="white", axis.scales=true, grid=true, ellipsoid=false, xlab="kinderl2reading", ylab="grade1l1reading", zlab="l2phonemicawareness") 7.1.3 Tree Models library(tree) model=tree(lafrance5) plot(model) text(model) model=tree(Grade1L1ReadingPerformance~., lafrance.tree) 7.3 Doing the Same Type of Regression as SPSS model=lm(grade1l1reading~nonverbalreasoning+kinderl2reading+namingspeed+workingmemory+phonologicalawarenessinl2, data=lafrance5) summary(model) confint(model) model.standard=lm(scale(grade1l1reading)~scale(nonverbalreasoning)+scale(kinderl2reading)+ scale(namingspeed)+scale(workingmemory)+scale(phonologicalawarenessinl2), data=lafrance5) library(relaimpo) calc.relimp(model) RegModel.1 <- lm(Grade1L1ReadingPerformance~KinderL2ReadingPerformance+NamingSpeed+NonverbalReasoning+PhonologicalAwarenessInL2+WorkingMemory,data=lafrance5) 7.5 Finding the Best Fit model = y~A + B + C + A:B + B:C + A:B:C model = y~A*B*C = A + B + C + A:B + B:C + A:B:C model = y~(A + B + C)^2 = A + B + C + A:B + A:C model = y~A*B*C – A:B:C model = y~A + I(A^2) + B + I(B^2) 7.5.1 First Steps to Finding the Minimal Adequate Model in R library(mice) imp<-mice(lafrance5) complete(imp) lafrance<-complete(imp) model1=lm(g1l1wr~pal2*kl2wr*ns, na.action=na.exclude, data=lafrance) model=lm(g1l1wr ~ ., data=lafrance, na.action=na.exclude) summary(model1) model2=update(model1,~.- pal2:kl2wr:ns, data=lafrance) summary(model2) anova(model1,model2) AIC(model1,model2) model3=update(model2,~.- pal2:ns, data=lafrance) summary(model3) anova(model2,model3) model4=update(model3,~.- kl2wr:ns, data=lafrance) summary(model4) anova(model3,model4) model5=update(model4,~.- pal2:kl2wr, data=lafrance) summary(model5) anova(model4,model5) model6=update(model5,~.-ns, data=lafrance) summary(model6) anova(model5,model6) model7=update(model6,~.-kl2wr, data=lafrance) summary(model7) model8=lm(g1l1wr~1,data=lafrance,na.action=na.exclude) anova(model7,model8) library(bootStepAIC) boot.stepAIC(model1,data=lafrance) 7.5.2 Reporting the Results of Regression in R deviance(model1) [1] 3.484850 7.6 Further Steps to Finding the Best Fit: Overparameterization and Polynomial Regression model1=lm(g1l1wr~nr+i(nr^2)+wm+i(wm^2)+ns+i(ns^2)+pal2+i(pal2^2)+kl2wr+i(kl2wr^2),data=lafrance) summary(model1) library(bootStepAIC) boot.stepAIC(model1, data=lafrance) model2=lm(g1l1wr~ ns + i(ns^2) + pal2 + i(pal2^2), data=lafrance) summary(model2) getfullmodel=lm(g1l1wr~nr*wm*ns*pal2*kl2wr,data=lafrance) summary(getfullmodel) #output not printed but gives me what i need for following command interactions=c("nr:wm", "nr:ns", "wm:ns", "nr:pal2", "wm:pal2", "ns:pal2", "nr:kl2wr", "wm:kl2wr", "ns:kl2wr", "pal2:kl2wr") model3= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+pal2:kl2wr + nr:ns + ns:pal2 + nr:wm+ns:kl2wr, data=lafrance) model4= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+ wm:kl2wr + nr:kl2wr+wm:pal2+wm:ns+nr:pal2, data=lafrance) pal2:kl2wr nr:wm wm:kl2w nr:kl2wr nr:pal2 model5= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+pal2:kl2wr+ nr:wm +wm:kl2wr +nr:kl2wr +nr:pal2, data=lafrance) boot.stepAIC(model5, data=lafrance) model6=update(model5,~.-ns-pal2:kl2wr-nr:wm, data=lafrance) interactions=c("nr:wm:ns", "nr:wm: pal2", "nr:ns: pal2", "wm:ns: pal2", "nr:wm: kl2wr", "nr:ns: kl2wr", "wm:ns: kl2wr", "nr: pal2: kl2wr", "wm: pal2: kl2wr", "ns: pal2: kl2wr") model7= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+ ns:pal2:kl2wr+nr:wm:pal2+nr:ns:kl2wr+nr:wm:ns+ wm:pal2:kl2wr, data=lafrance) model8= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+ nr:pal2:kl2wr+wm:ns:kl2wr+nr:wm:kl2wr+nr:ns:pal2+ wm:ns:pal2, data=lafrance) model9= lm(g1l1wr~nr+wm+ ns+ pal2+ kl2wr+ nr:wm:ns:pal2 + nr:wm:ns:kl2wr + nr:wm:pal2:kl2wr + nr:ns:pal2:kl2wr + wm:ns:pal2:kl2wr + nr:wm:ns:pal2:kl2wr, data=lafrance) model10=lm(g1l1wr~ nr + wm + ns + i(ns^2) + pal2 + i(pal2^2) + kl2wr+ wm:kl2wr +nr:kl2wr +nr:pal2+ #two-way interactions nr:wm:ns +wm:pal2:kl2wr + #three-way interactions nr:wm:ns:kl2wr + wm:ns:pal2:kl2wr, #four-way interactions data=lafrance) model11=lm(g1l1wr~ nr + wm + ns+ i(ns^2)+pal2 +kl2wr+ wm:kl2wr + nr:pal2+nr:wm:ns, data=lafrance) model12=lm(g1l1wr~pal2, data=lafrance) anova(model10,model11) 7.7 Examining Regression Assumptions plot(LinearModel.56,cex=1.5) model12=lm(g1l1wr~pal2,data=lafrance) plot(model12,cex=1.5) model13=update(model12, subset=(lafrance !=22)) plot(model12,which=4) vif(model) model.vif=lm(grade1l1reading ~ nonverbalreasoning + kinderl2reading + namingspeed + workingmemory + l2phonemicawareness + ,data=lafrance) vif(model.vif) library(MASS) plot(row.names(lafrance),stdres(model12),cex=1.5) plot(model1) plot(model1,which=4) plot(model1,which=6) vif(model1) library(MASS) plot(row.names(lafrance),stdres(model1)) 7.9.1 Visualizing Robust Reggression library(robustbase) covplot(cbind(Lafrance3$G1L1WR,Lafrance3$PAL2),which="tolellipseplot", classic=T, cor=T) library(robust) lafrance1.fit=fit.models(list(Robust="covRob", Classical="ccov"), data=Lafrance3) Lafrance3<-na.omit(Lafrance3) plot(lafrance1.fit) covPlot(cbind(lafrance$G1L1WR,Lafrance3$PAL2),which="tolEllipsePlot", classic=T) library(robust) your.fit=fit.models(list(Robust="covRob", Classical="ccov"), data=yourdata) plot(your.fit) 7.9.2 Robust Regression Methods library(robust) Lafrance3.fit=fit.models(list(Robust="lmRob", LS="lm"), formula=G1L1WR~ PAL2*KL2WR*NS, data=Lafrance3) summary(Lafrance3.fit) lafrance3.fit=fit.models(list(Robust="lmRob", LS="lm"), formula=g1l1wr~pal2*kl2wr*ns, data=lafrance3) m1.robust=lmRob(G1L1WR~PAL2*KL2WR*NS,data=Lafrance3) summary(m1.robust) m2.robust=lmRob(G1L1WR~PAL2+KL2WR+NS+PAL2:KL2WR+PAL2:NS+KL2WR: NS,data=Lafrance3) #this deletes the 3-way interaction term PAL2:KL2WR:NS summary(m2.robust) m3.robust=lmRob(G1L1WR~PAL2+KL2WR+NS,data=Lafrance3) step.lmRob(m3.robust) m4.robust=lmRob(G1L1WR~PAL2+NS,data=Lafrance3) summary(m4.robust) plot.lmRob(m1.robust) plot(Lafrance3.fit) my.control=lmRob.control(initial.alg="Auto", final.alg="MM", efficiency=.95) m1.robust=lmRob(G1L1WR~PAL2*KL2WR*NS,data=Lafrance3) summary(m1.robust) m2.robust= lmRob(G1L1WR~PAL2+KL2WR+NS- PAL2:KL2WR:NS, data=Lafrance3) anova(m1.robust,m2.robust) plot(m1.robust) #for diagnostic plots