#Chapter3 Script #Obtaining Numerical Summaries numSummary(forget[,"rlwtest"], groups= Status, statistics=c("mean", "sd", "quantiles")) sum(!is.na(rlwtest)) #overall N for the variable #other types of statistical summaries attach(forget) tapply(rlwtest, list(Status=forget$Status), mean, na.rm=T) #To summarize and remove any NAs sum(!is.na(rlwtest)) #overall N for the variable sum(!is.na(usyears)) #count for each group table(status[!is.na(rlwtest)]) #N count for each group table(writing$L1,writing$condition[!is.na(writing$score)]) #get the mean, median, minimum, maximum, and Q1 and Q3 for all numeric variables quickly summary(writing) #minimum and maximum scores, mean, median, variance, standard #deviation, and skewness and kurtosis numbers install.packages("fBasics") library(fBasics) forget$rlwtest[1:15] basicStats(forget$rlwtest[1:15]) non=forget$rlwtest[1:15] skewness(non, na.rm=T) kurtosis(non, na.rm=T) #choosing rows 1¨C15 could also use the subset() non <- subset(forget, subset=1:15) non #the Shapiro¨CWilk and Kolmogorov¨CSmirnoff tests. shapiro.test(non) # Shapiro-Wilk normality test ks.test(non,"pnorm") #One-sample Kolmogorov-Smirnov test #the nortest library provides five more tests of normality. install.packages("nortest") library(nortest) lillie.test(non) #histogram par(mfrow=c(1,3)) #sets the graphics display to 1 row, 3 columns Hist(forget$rlwtest[1:15], col="gray", border="darkgray", xlab="", main="Non-immersionists") Hist(forget$rlwtest[16:30], col="gray", border="darkgray", xlab="", ylab="", main="Late immersionists") Hist(forget$rlwtest[31:44], col="gray", border="darkgray", xlab="", ylab="", main="Early immersionists") #to overlay the histogram with a density plot of the normal distribution norm.x=rnorm(50,0,1) x=seq(-3.5, 3.5, .1) dn=dnorm(x) hist(norm.x, xlab="", main="50 samples", col="lightgray", border="darkgray", prob=T) lines(x, dn, col="red", lwd=2) #to creating Stem and Leaf Plots with(forget, stem.leaf(rlwtest, na.rm=TRUE)) #using the Lattice graphics library to produce a set of Q-Q plots library(lattice) qqmath(~rlwtest|Status, aspect="xy", data=forget,layout=c(3,1), xlab="Q-Qplot", prepanel=prepanel.qqmathline, panel=function(x, ...){ panel.qqmathline(x,...) panel.qqmath(x,...) }) #imputing Missing Data library(mice) imp<-mice(lafrance) complete(imp) #shows the completed data matrix implafrance<-complete(imp) #name the new file to work with library(dprep) imagmiss(lafrance, name="Lafrance") imagmiss(implafrance, name="Lafrance imputed") #transformations dekeyser$sqrtGJTScore <- with(dekeyser, sqrt(200-GJTScore))