#Chapter3 Script
#Obtaining Numerical Summaries
numSummary(forget[,"rlwtest"], groups= Status, statistics=c("mean", "sd", "quantiles"))

sum(!is.na(rlwtest)) #overall N for the variable 

#other types of statistical summaries

attach(forget) 
tapply(rlwtest, list(Status=forget$Status), mean, na.rm=T)

#To summarize and remove any NAs 
sum(!is.na(rlwtest)) #overall N for the variable 
sum(!is.na(usyears))
#count for each group 
table(status[!is.na(rlwtest)]) #N count for each group 

table(writing$L1,writing$condition[!is.na(writing$score)]) 

#get the mean, median, minimum, maximum, and Q1 and Q3 for all numeric variables quickly

summary(writing)

#minimum and maximum scores, mean, median, variance, standard #deviation, and skewness and kurtosis numbers 

install.packages("fBasics")
library(fBasics)
forget$rlwtest[1:15]
basicStats(forget$rlwtest[1:15])


non=forget$rlwtest[1:15]
skewness(non, na.rm=T) 
kurtosis(non, na.rm=T) 

#choosing rows 1每15 could also use the subset() 
non <- subset(forget, subset=1:15)
non

#the Shapiro每Wilk and Kolmogorov每Smirnoff tests. 
shapiro.test(non)  # Shapiro-Wilk normality test
ks.test(non,"pnorm") #One-sample Kolmogorov-Smirnov test

#the nortest library provides five more tests of normality.
install.packages("nortest")
library(nortest) 
lillie.test(non) 

#histogram
par(mfrow=c(1,3)) #sets the graphics display to 1 row, 3 columns
Hist(forget$rlwtest[1:15], col="gray", border="darkgray", xlab="", main="Non-immersionists")
Hist(forget$rlwtest[16:30], col="gray", border="darkgray", xlab="", ylab="", main="Late immersionists")
Hist(forget$rlwtest[31:44], col="gray", border="darkgray", xlab="", ylab="", main="Early immersionists")

#to overlay the histogram with a density plot of the normal distribution
norm.x=rnorm(50,0,1)
x=seq(-3.5, 3.5, .1)
dn=dnorm(x)
hist(norm.x, xlab="", main="50 samples", col="lightgray", border="darkgray", prob=T)
lines(x, dn, col="red", lwd=2)

 #to creating Stem and Leaf Plots
with(forget, stem.leaf(rlwtest, na.rm=TRUE))

#using the Lattice graphics library to produce a set of Q-Q plots 

library(lattice)
qqmath(~rlwtest|Status, aspect="xy", data=forget,layout=c(3,1), xlab="Q-Qplot",
prepanel=prepanel.qqmathline,
panel=function(x, ...){
panel.qqmathline(x,...)
panel.qqmath(x,...)
})

#imputing Missing Data
library(mice) 
imp<-mice(lafrance)
complete(imp) #shows the completed data matrix 
implafrance<-complete(imp) #name the new file to work with 

library(dprep) 
imagmiss(lafrance, name="Lafrance") 
imagmiss(implafrance, name="Lafrance imputed") 

#transformations 

dekeyser$sqrtGJTScore <- with(dekeyser, sqrt(200-GJTScore))