B R Code

Chapter 1: Introduction

# This is a comment within a block of R code. Comments 
# start with the hash sign and are ignored by R. The code 
# below will be interpreted by R once you paste or type it
# into the console.
x <- c(4, 8, 15, 16, 23, 42)
mean(x)  # Only code after the hash is ignored
sd(x)
# Calculate the mean of x
mx <- mean(x)
# Print x and the mean of x
x
mx
print(mx)
# Some examples require data and functions from add-on R 
# packages
# This will install required packages - you only need to 
# do this once
install.packages("devtools")
install.packages("knitr")
install.packages("ggplot2")
# This will install epmr from github
devtools::install_github("talbano/epmr")
# Load required packages - do this every time you
# restart R
library("epmr")
library("ggplot2")
# Create a factor variable
classroom <- c("A", "B", "C", "A", "B", "C")
classroom <- factor(classroom)
classroom[c(1, 4)]
# Combine variables as columns in a data frame
mydata <- data.frame(scores = x, classroom)
mydata[1:4, ]
# Load the PISA data set and print subsets of it
data(PISA09)
PISA09[c(1, 10000, 40000), c(1, 6, 7, 38)]
PISA09$age[1:10]
# Get descriptive statistics for three variables in PISA09
# The subset() function is demonstrated further below
dstudy(subset(PISA09, select = c(elab, cstrat, memor)))
# Check the class of the country variable and create a 
# frequency table
class(PISA09$cnt)
table(PISA09$cnt)
# Convert a frequency table for country to percentages, 
# then round the result
cntpct <- table(PISA09$cnt) / nrow(PISA09) * 100
round(cntpct, 2)
# Create a bar chart for the number of students by country
ggplot(PISA09, aes(cnt)) + geom_bar(stat = "count")
# Histograms are more appropriate for continuous or 
# somewhat continuous scores
qplot(PISA09$memor, binwidth = .5)
# Create a series of box plots for memor scores by country
ggplot(PISA09, aes(cnt, memor)) + geom_boxplot()
# Check out the minimum, maximum, and range (a shortcut 
# for the min and max) for the age variable
min(PISA09$age)
max(PISA09$age)
range(PISA09$age)
# Check the standard deviations on a survey item for Hong 
# Kong and Germany
sd(PISA09$st33q01[PISA09$cnt == "HKG"], na.rm = TRUE)
sd(PISA09$st33q01[PISA09$cnt == "DEU"], na.rm = TRUE)
# Examples of subsetting using brackets with logical vectors
mydata # Print the full data frame
mydata$scores < 10 # Is each score less than 10, TRUE or FALSE?
mydata[mydata$scores < 10, ] # Print only for scores below 10
mydata[mydata$classroom == "A", ] # Print only classroom A
# Get the covariance and correlation for different 
# variables
# The use argument specifies how to handle missing data
# See ?cor for details
cov(PISA09$age, PISA09$grade, use = "complete")
cor(PISA09[, c("elab", "cstrat", "memor")],
  use = "complete")
# Scatter plot for two learning strategies variables
# geom_point() adds the points to the plot
# position_jitter() wiggles them around a little, to
# uncover the densities of points that would otherwise
# overlap
ggplot(subset(PISA09, cnt == "USA"), aes(elab, cstrat)) +
  geom_point(position = position_jitter(w = 0.1, h = 0.1))
# Examples of subsetting using the logical vectors and the
# subset() function
mydata # Print the full data frame again
subset(mydata, subset = scores < 10) # Print only for scores below 10
subset(mydata, subset = classroom == "A") # Print only classroom A
subset(mydata, subset = scores < 10, select = scores) # Subset a column
# Convert age to z-scores, then rescale to have a new mean
# and SD
# You should check the mean and SD of both zage and newage
# Also, see setmean() and setsd() from epmr, and scale()
# from base R
zage <- (PISA09$age - mean(PISA09$age)) / sd(PISA09$age)
newage <- (zage * 150) + 500
# Histograms of age and the rescaled version of age
ggplot(PISA09, aes(factor(round(zage, 2)))) + geom_bar()
ggplot(PISA09, aes(factor(round(newage, 2)))) + geom_bar()

Chapter 2: Measurement, Scales, and Scoring

# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# data(), class(), factor(), c(), from chapter 1
# head() to print the first six rows or values in an 
# object
# paste0() for pasting together text and using it to index
# a data set
# apply() for applying a function over rows or columns of 
# a data set
# tapply() for applying a function over groups
# dstudy() from the epmr package for getting descriptives
# ggplot(), aes(), and geom_boxplot() for plotting
# round(), nrow(), and with() for examining data
# We'll use a data set included in the base R packages 
# called state.x77
# Load state data
data(state)
# Print first 6 rows, all columns
head(state.x77)
# Comparing two classifications of nominal variables in R
# Nominal classroom labels as character
roomnumber <- c("1", "2", "3", "1", "2", "3")
class(roomnumber)
# Nominal classroom labels as factor
roomnumber <- factor(roomnumber)
class(roomnumber)
# Names of all reading items, to be used as an indexing 
# object
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
  "r452q03", "r452q04", "r452q06", "r452q07", "r458q01", 
  "r458q07", "r458q04")
# Paste an "s"" to the end of each name, for scored items
rsitems <- paste0(ritems, "s")
# apply() applies to a data set (the first argument) 
# across either
# rows or columns (the second argument) the function named
# (in the third argument). See also rowSums(). Here, we 
# treat missings as 0s, by excluding them from the sum.
PISA09$rtotal <- apply(PISA09[, rsitems], 1, sum,
  na.rm = TRUE)
dstudy(PISA09$rtotal)
# Bar plot of reading totals, which are, ironically, 
# converted to a factor before plotting, so ggplot treats 
# them as a discrete
# scale rather than continuous. Continuous defaults to a 
# histogram which isn't as useful.
ggplot(PISA09, aes(factor(rtotal))) + geom_bar()
# The apply() function again, used to iterate through 
# reading items, and for each column (hence the 2), run a 
# frequency table.
# Then, divide the result by the number of students, and 
# round to 2 decimal places.
rtab <- apply(PISA09[, rsitems], 2, table)
round(rtab / nrow(PISA09), 2)
# Reverse code two variables
PISA09$st33q01r <- recode(PISA09$st33q01)
PISA09$st33q02r <- recode(PISA09$st33q02)
# Names of all attitude toward school items, to be used as
# an indexing object
# Note the added "r" in the names for the first two items,
# which were recoded
atsitems <- c("st33q01r", "st33q02r", "st33q03", 
  "st33q04")
# Calculate total scores again using apply(), and look at 
# descriptives
PISA09$atotal <- apply(PISA09[, atsitems], 1, sum,
  na.rm = TRUE)
dstudy(PISA09$atotal)
# tapply() is like apply() but instead of specifying rows
# or columns of a matrix, we provide an index variable.
# Here, median() will be applied over subsets of rtotal by
# grade. with() is used to subset only German students.
with(PISA09[PISA09$cnt == "DEU", ],
  tapply(rtotal, grade, median, na.rm = TRUE))
# Most German students are in 9th grade. Medians aren't as
# useful for grades 7, 11, and 12.
table(PISA09$grade[PISA09$cnt == "DEU"])
# Get box plots of reading total scores
ggplot(PISA09[PISA09$cnt == "DEU" & PISA09$grade %in% 
  8:10, ], aes(x = factor(grade), y = rtotal)) +
  geom_boxplot()

Chapter 5: Reliability

# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# set.seed(), rnorm(), sample(), and runif() to simulate
# data
# rowSums() for getting totals by row
# rsim() and setrange() from epmr to simulate and modify
# scores
# rstudy(), coef_alpha(), and sem() from epmr to get
# reliability and SEM
# geom_point(), geom_abline(), and geom_errorbar() for
# plotting
# diag() for getting the diagonal elements from a matrix
# astudy() from epmr to get interrater agreement
# gstudy() from epmr to run a generalizability study
# Simulate a constant true score, and randomly varying
# error scores from
# a normal population with mean 0 and SD 1
# set.seed() gives R a starting point for generating
# random numbers
# so we can get the same results on different computers
# You should check the mean and SD of E and X
# Creating a histogram of X might be interesting too
set.seed(160416)
myt <- 20
mye <- rnorm(1000, mean = 0, sd = 1)
myx <- myt + mye
# Calculate total reading scores, as in Chapter 2
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09", 
  "r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
  "r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
xscores <- rowSums(PISA09[PISA09$cnt == "BEL", rsitems],
  na.rm = TRUE)
# Simulate error scores based on known SEM of 1.4, which
# we'll calculate later, then create true scores
# True scores are truncated to fall bewteen 0 and 11 using
# setrange()
escores <- rnorm(length(xscores), 0, 1.4)
tscores <- setrange(xscores - escores, y = xscores)
# Combine in a data frame and create a scatterplot
scores <- data.frame(x1 = xscores, t = tscores,
  e = escores)
ggplot(scores, aes(x1, t)) +
  geom_point(position = position_jitter(w = .3)) +
  geom_abline(col = "blue")
# Simulate scores for a new form of the reading test 
# called y
# rho is the made up reliability, which is set to 0.80
# x is the original reading total scores
# Form y is slightly easier than x with mean 6 and SD 3
xysim <- rsim(rho = .8, x = scores$x1, meany = 6, sdy = 3)
scores$x2 <- round(setrange(xysim$y, scores$x1))
ggplot(scores, aes(x1, x2)) +
  geom_point(position = position_jitter(w = .3, h = .3)) +
  geom_abline(col = "blue")
# Split half correlation, assuming we only had scores on 
# one test form
# With an odd number of reading items, one half has 5 
# items and the other has 6
xsplit1 <- rowSums(PISA09[PISA09$cnt == "BEL", 
  rsitems[1:5]])
xsplit2 <- rowSums(PISA09[PISA09$cnt == "BEL", 
  rsitems[6:11]])
cor(xsplit1, xsplit2, use = "complete")
# sb_r() in the epmr package uses the Spearman-Brown 
# formula to estimate how reliability would change when 
# test length changes by a factor k
# If test length were doubled, k would be 2
sb_r(r = cor(xsplit1, xsplit2, use = "complete"), k = 2)
# epmr includes rstudy() which estimates alpha and a 
# related form of reliability called omega, along with 
# corresponding SEM
# You can also use coef_alpha() to obtain coefficient 
# alpha directly
rstudy(PISA09[, rsitems])
# Get alpha and SEM for students in Belgium
bela <- coef_alpha(PISA09[PISA09$cnt == "BEL", rsitems])$alpha
# The sem function from epmr sometimes overlaps with sem from 
# another R package so we're spelling it out here in long 
# form
belsem <- epmr::sem(r = bela, sd = sd(scores$x1,
  na.rm = T))
# Plot the 11 possible total scores against themselves
# Error bars are shown for 1 SEM, giving a 68% confidence
# interval and 2 SEM, giving the 95% confidence interval
# x is converted to factor to show discrete values on the 
# x-axis
beldat <- data.frame(x = 1:11, sem = belsem)
ggplot(beldat, aes(factor(x), x)) +
  geom_errorbar(aes(ymin = x - sem * 2,
    ymax = x + sem * 2), col = "violet") +
  geom_errorbar(aes(ymin = x - sem, ymax = x + sem),
    col = "yellow") +
  geom_point()
# Simulate random coin flips for two raters
# runif() generates random numbers from a uniform 
# distribution
flip1 <- round(runif(30))
flip2 <- round(runif(30))
table(flip1, flip2)
# Simulate essay scores from two raters with a population 
# correlation of 0.90, and slightly different mean scores,
# with score range 0 to 6
# Note the capital T is an abbreviation for TRUE
essays <- rsim(100, rho = .9, meanx = 4, meany = 3,
  sdx = 1.5, sdy = 1.5, to.data.frame = T)
colnames(essays) <- c("r1", "r2")
essays <- round(setrange(essays, to = c(0, 6)))
# Use a cut off of greater than or equal to 3 to determine
# pass versus fail scores
# ifelse() takes a vector of TRUEs and FALSEs as its first
# argument, and returns here "Pass" for TRUE and "Fail" 
# for FALSE
essays$f1 <- factor(ifelse(essays$r1 >= 3, "Pass", 
  "Fail"))
essays$f2 <- factor(ifelse(essays$r2 >= 3, "Pass", 
  "Fail"))
table(essays$f1, essays$f2)
# Pull the diagonal elements out of the crosstab with 
# diag(), sum them, and divide by the number of people
sum(diag(table(essays$r1, essays$r2))) / nrow(essays)
# Randomly sample from the vector c("Pass", "Fail"), 
# nrow(essays) times, with replacement
# Without replacement, we'd only have 2 values to sample 
# from
monkey <- sample(c("Pass", "Fail"), nrow(essays),
  replace = TRUE)
table(essays$f1, monkey)
# Use the astudy() function from epmr to measure agreement
astudy(essays[, 1:2])
# Certain changes in descriptive statistics, like adding 
# constants won't impact correlations
cor(essays$r1, essays$r2)
dstudy(essays[, 1:2])
cor(essays$r1, essays$r2 + 1)
# Comparing scatter plots
ggplot(essays, aes(r1, r2)) +
  geom_point(position = position_jitter(w = .1, h = .1)) +
  geom_abline(col = "blue")
ggplot(essays, aes(r1, r2 + 1)) +
  geom_point(position = position_jitter(w = .1, h = .1)) +
  geom_abline(col = "blue")
# Run a g study on simulated essay scores
essays$r3 <- rsim(100, rho = .9, x = essays$r2,
  meany = 3.5, sdy = 1.25)$y
essays$r3 <- round(setrange(essays$r3, to = c(0, 6)))
gstudy(essays[, c("r1", "r2", "r3")])

Chapter 6: Item Analysis

# R setup for this chapter
# Required packages are assumed to be installed,
# see chapter 1
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# str() for checking the structure of an object
# recode() for recoding variables
# colMeans() for getting means by column
# istudy() from epmr for running an item analysis
# ostudy() from epmr for running an option analysis
# subset() for subsetting data
# na.omit() for removing cases with missing data
# Recreate the item name index and use it to check the 
# structure of the unscored reading items
# The strict.width argument is optional, making sure the
# results fit in the console window
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
  "r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
  "r458q07", "r458q04")
str(PISA09[, ritems], strict.width = "cut")
# Subsets of the reading item index for constructed and
# selected items
# Check frequency tables by item (hence the 2 in apply)
# for CR items
critems <- ritems[c(3, 5, 7, 10)]
sritems <- ritems[c(1:2, 4, 6, 8:9, 11)]
apply(PISA09[, critems], 2, table, exclude = NULL)
# Indices for scored reading items
rsitems <- paste0(ritems, "s")
crsitems <- paste0(critems, "s")
srsitems <- paste0(sritems, "s")
# Tabulate unscored and scored responses for the first CR
# item
# exclude = NULL shows us NAs as well
# raw and scored are not arguments to table, but are used
# simply to give labels to the printed output
table(raw = PISA09[, critems[1]],
  scored = PISA09[, crsitems[1]],
  exclude = NULL)
# Create the same type of table for the first SR item
# Check the structure of raw attitude items
str(PISA09[, c("st33q01", "st33q02", "st33q03",
  "st33q04")])
# Rescore two items
PISA09$st33q01r <- recode(PISA09$st33q01)
PISA09$st33q02r <- recode(PISA09$st33q02)
# Get p-values for reading items by type
round(colMeans(PISA09[, crsitems], na.rm = T), 2)
round(colMeans(PISA09[, srsitems], na.rm = T), 2)
# Index for attitude toward school items, with the first
# two items recoded
atsitems <- c("st33q01r", "st33q02r", "st33q03",
  "st33q04")
# Check mean scores
round(colMeans(PISA09[, atsitems], na.rm = T), 2)
# Convert polytomous to dichotomous, with any disagreement
# coded as 0 and any agreement coded as 1
ats <- apply(PISA09[, atsitems], 2, recode,
  list("0" = 1:2, "1" = 3:4))
round(colMeans(ats, na.rm = T), 2)
# Get total reading scores and check descriptives
PISA09$rtotal <- rowSums(PISA09[, rsitems])
dstudy(PISA09$rtotal)
# Compare CR item p-values for students below vs above the
# median total score
round(colMeans(PISA09[PISA09$rtotal <= 6, crsitems],
  na.rm = T), 2)
round(colMeans(PISA09[PISA09$rtotal > 6, crsitems],
  na.rm = T), 2)
# Create subset of data for German students, then reduce
# to complete data
pisadeu <- subset(PISA09, subset = cnt == "DEU",
  select = c(crsitems, "rtotal"))
pisadeu <- na.omit(pisadeu)
round(cor(pisadeu), 2)
# Scatter plots for visualizing item discrimination
ggplot(pisadeu, aes(rtotal, factor(r414q06s))) +
  geom_point(position = position_jitter(w = 0.2, h = 0.2))
ggplot(pisadeu, aes(rtotal, factor(r452q03s))) +
  geom_point(position = position_jitter(w = 0.2, h = 0.2))
# Caculate ITC and CITC by hand for one of the attitude
# toward school items
PISA09$atstotal <- rowSums(PISA09[, atsitems])
cor(PISA09$atstotal, PISA09$st33q01r, use = "c")
cor(PISA09$atstotal - PISA09$st33q01r,
  PISA09$st33q01r, use = "c")
# Estimate item analysis statistics, including alpha if
# item deleted
istudy(PISA09[, rsitems])
# Subset of data for Hong Kong, scored reading items
pisahkg <- subset(PISA09, cnt == "HKG", rsitems)
# Spearman-Brown based on original alpha and new test
# length of 8 items
sb_r(r = coef_alpha(pisahkg)$alpha, k = 8/11)
# Item analysis for Hong Kong
istudy(pisahkg)
# Item analysis for a subset of items 
istudy(pisahkg[, rsitems[-c(2, 5)]])
# Item option study on all SR reading items
pisao <- ostudy(PISA09[, sritems], scores = PISA09$rtotal)
# Print frequency results for one item
pisao$counts$r458q04
# Print option analysis percentages by column
# This item discriminates relatively well
pisao$colpct$r458q04
# Print option analysis percentages by column
# This item discriminates relatively less well
pisao$colpct$r414q11

Chapter 7: Item Response Theory

# R setup for this chapter
library("epmr")
library("ggplot2")
# Functions we'll use in this chapter
# rirf() from epmr for getting an item response function
# scale_y_continuous() for plotting a continuous scale
# irtstudy() from epmr for running an IRT study
# rtef() from epmr for getting a test error function
# rtif() from epmr for getting a test information function
# Prepping data for examples
# Create subset of data for Great Britain, then reduce to 
# complete data
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09",
  "r452q03", "r452q04", "r452q06", "r452q07", "r458q01",
  "r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
PISA09$rtotal <- rowSums(PISA09[, rsitems])
pisagbr <- PISA09[PISA09$cnt == "GBR",
  c(rsitems, "rtotal")]
pisagbr <- pisagbr[complete.cases(pisagbr), ]
# Get p-values conditional on rtotal
# tapply() applies a function to the first argument over
# subsets of data defined by the second argument
pvalues <- data.frame(rtotal = 0:11,
  p = tapply(pisagbr$r452q06s, pisagbr$rtotal, mean))
# Plot CTT discrimination over scatter plot of scored item
# responses
ggplot(pisagbr, aes(rtotal, r414q06s)) +
  geom_point(position = position_jitter(w = 0.1,
    h = 0.1)) +
  geom_smooth(method = "lm", fill = NA) +
  geom_point(aes(rtotal, p), data = pvalues,
    col = "green", size = 3)
# Make up a, b, and c parameters for five items
# Get IRF using the rirf() function from epmr and plot
# rirf() will be demonstrated again later
ipar <- data.frame(a = c(2, 1, .5, 1, 1.5),
  b = c(3, 2, -.5, 0, -1),
  c = c(0, .2, .25, .1, .28),
  row.names = paste0("item", 1:5))
ggplot(rirf(ipar), aes(theta)) + scale_y_continuous("Pr(X)") +
  geom_line(aes(y = item1), col = 1) +
  geom_line(aes(y = item2), col = 2) +
  geom_line(aes(y = item3), col = 3) +
  geom_line(aes(y = item4), col = 4) +
  geom_line(aes(y = item5), col = 5)
# The irtstudy() function estimates theta and b parameters
# for a set of scored item responses
irtgbr <- irtstudy(pisagbr[, rsitems])
irtgbr
head(irtgbr$ip)
# Get IRF for the set of GBR reading item parameters and a
# vector of thetas
# Note the default thetas of seq(-4, 4, length = 100)
# could also be used
irfgbr <- rirf(irtgbr$ip, seq(-6, 6, length = 100))
# Plot IRF for items r452q03s, r452q04s, r452q06s, and
# r452q07s
ggplot(irfgbr, aes(theta)) + scale_y_continuous("Pr(X)") +
  geom_line(aes(y = irfgbr$r452q03s, col = "r452q03")) +
  geom_line(aes(y = irfgbr$r452q04s, col = "r452q04")) +
  geom_line(aes(y = irfgbr$r452q06s, col = "r452q06")) +
  geom_line(aes(y = irfgbr$r452q07s, col = "r452q07")) +
  scale_colour_discrete(name = "item")
# Plot SEM curve conditional on theta for full items
# Then add SEM for the subset of items 1:8 and 1:4
ggplot(rtef(irtgbr$ip), aes(theta, se)) + geom_line() +
  geom_line(aes(theta, se), data = rtef(irtgbr$ip[1:8, ]),
    col = "blue") +
  geom_line(aes(theta, se), data = rtef(irtgbr$ip[1:4, ]),
    col = "green")
# SEM for theta 3 based on the four easiest and the four
# most difficult items
rtef(irtgbr$ip[c(4, 6, 9, 11), ], theta = 3)
rtef(irtgbr$ip[c(2, 7, 8, 10), ], theta = 3)
# Plot the test information function over theta
# Information is highest at the center of the theta scale
ggplot(rtif(irtgbr$ip), aes(theta, i)) + geom_line()
# Plot the test response function over theta
ggplot(rtrf(irtgbr$ip), aes(theta, p)) + geom_line()

Chapter 8: Factor Analysis

# R setup for this chapter
library("epmr")
# We're using a new package for CFA called lavaan
install.packages("lavaan")
library("lavaan")
# Functions we'll use
# fastudy() and plot() from epmr
# lavaanify() and cfa() from lavaan
# Subset of correlations from BDI data set in the epmr 
# package
BDI$R[1:4, 1:4]
# Prepping PISA approaches to learning data for EFA
# Vectors of item names for memorization, elaboration, and
# control strategies
mitems <- c("st27q01", "st27q03", "st27q05", "st27q07")
eitems <- c("st27q04", "st27q08", "st27q10", "st27q12")
citems <- c("st27q02", "st27q06", "st27q09", "st27q11", 
  "st27q13")
alitems <- c(mitems, eitems, citems)
# Reduce to complete data for Great Britain
pisagbr <- subset(PISA09, subset = cnt == "GBR", select = alitems)
pisagbr <- na.omit(pisagbr)
# Fit EFA with three factors
alefa <- fastudy(pisagbr, factors = 3)
# Print approaches to learning EFA results
print(alefa, digits = 2)
# Print results again, rounding and filtering loadings
print(alefa, digits = 2, cutoff = 0.3)
# Print uniquenesses, and check sum of squared loadings
round(alefa$uniquenesses, 2)
round(rowSums(alefa$loadings^2) + alefa$uniquenesses, 2)
# Correlations between PISA approaches to learning scores
# for Great Britain
round(cor(subset(PISA09, cnt == "GBR", c("memor", "elab", 
  "cstrat")), use = "c"), 2)
# Plot of approaches to learning eigenvalues
plot(alefa, ylim = c(0, 3))
# Plot of eigenvalues for BDI
bdiefa <- fastudy(covmat = BDI$S, factors = 12,
  n.obs = 576)
plot(bdiefa, ylim = c(0, 3))
# CFA of BDI using the lavaan package
# Specify the factor structure
# Comments within the model statement are ignored
bdimod <- lavaanify(model = "
  # Latent variable definitions
  cog =~ sadness + crying + failure + guilt + punish +
    dislike + critical + pessimism + nopleasure +
    nointerest + noworth + suicide + indecisive +
    irritable + agitated + nosex
  som =~ tired + noenergy + noconcentrate + appetite +
    sleep
  # Covariances
  sadness ~~ crying
  dislike ~~ critical
  nopleasure ~~ nointerest",
  auto.var = TRUE, auto.cov.lv.x = TRUE, std.lv = TRUE)
# Fit the model
bdicfa <- cfa(bdimod, sample.cov = BDI$S,
  sample.nobs = 576)
# Print fit indices, loadings, and other output
summary(bdicfa, fit = TRUE, standardized = TRUE)
# Specify the factor structure
# Comments within the model statement are ignored as
# comments
bdimod2 <- lavaanify(model = "
  depression =~ sadness + crying + failure + guilt +
    punish + dislike + critical + pessimism + nopleasure +
    nointerest + noworth + suicide + indecisive +
    irritable + agitated + nosex + tired + noenergy +
    noconcentrate + appetite + sleep",
  auto.var = TRUE, std.lv = TRUE)
# Fit the model
bdicfa2 <- cfa(bdimod2, sample.cov = BDI$S,
  sample.nobs = 576)
# Print output
summary(bdicfa2, fit = TRUE, standardized = TRUE)
# Compare fit for BDI CFA models
anova(bdicfa2, bdicfa)
# CFA with PISA approaches to learning scale
# Specify the factor structure
almod <- lavaanify(model = "
  # Three factors
  memor =~ st27q01 + st27q03 + st27q05 + st27q07
  elab =~ st27q04 + st27q08 + st27q10 + st27q12
  cstrat =~ st27q02 + st27q06 + st27q09 + st27q11 +
    st27q13",
  auto.var = TRUE, auto.cov.lv.x = TRUE, std.lv = TRUE)
# Fit the model
alcfa <- cfa(almod, sample.cov = cov(pisagbr[, alitems]),
  sample.nobs = 3514)
# Print output
summary(alcfa, fit = TRUE, standardized = TRUE)

Chapter 9: Validity

# R setup for this chapter
library("epmr")
# Functions we'll use
# cor() from the base package
# subset() from the base package for subsetting data
# rsim() from epmr to simulate scores
# rstudy() from epmr for finding alpha
# Get the vector of reading items names
ritems <- c("r414q02", "r414q11", "r414q06", "r414q09", 
  "r452q03", "r452q04", "r452q06", "r452q07", "r458q01", 
  "r458q07", "r458q04")
rsitems <- paste0(ritems, "s")
# Calculate total reading scores
pisausa <- subset(PISA09, cnt == "USA", rsitems)
rtotal <- rowSums(pisausa, na.rm = TRUE)
# Simulate a criterion - see Chapter 5 for another example
# using rsim
criterion <- rsim(rho = .8, x = rtotal, meany = 24,
  sdy = 6)
# Check the correlation
cor(rtotal, criterion$y)
# Internal consistency for the PISA items
rstudy(pisausa)
# Correction for attenuation
cor(rtotal, criterion$y)/sqrt(.77 * .86)