diff --git a/.Rbuildignore b/.Rbuildignore index 511668b..78f68fa 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,4 @@ ^docs$ ^pkgdown$ ^\.github$ +^\.vscode$ \ No newline at end of file diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 index 8ff9bf4..bb442ba --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode .Rproj.user .Rhistory .Rdata diff --git a/DESCRIPTION b/DESCRIPTION index fd24f70..988588e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: R2spa Title: An R package for two-stage path analysis (2S-PA) to adjust for measurement errors -Version: 0.0.2 +Version: 0.0.3 Authors@R: c( person(given = "Mark Hok Chio", @@ -36,12 +36,20 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Imports: lavaan, - MASS + MASS, + OpenMx Suggests: + boot, + DiagrammeR, knitr, magrittr, + Matrix, + mirt, + numDeriv, rmarkdown, - testthat (>= 3.0.0) + psych, + testthat (>= 3.0.0), + umx VignetteBuilder: knitr Config/testthat/edition: 3 URL: https://gengrui-zhang.github.io/R2spa/ diff --git a/NAMESPACE b/NAMESPACE index dd3a3f7..81f5c30 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,24 @@ # Generated by roxygen2: do not edit by hand +export(block_diag) export(compute_fscore) export(get_fs) +export(get_fs_lavaan) export(grandStandardizedSolution) +export(grand_standardized_solution) export(tspa) +export(tspa_mx_model) +export(tspa_plot) +export(vcov_corrected) +importFrom(OpenMx,mxAlgebraFromString) +importFrom(OpenMx,mxData) +importFrom(OpenMx,mxExpectationNormal) +importFrom(OpenMx,mxFitFunctionML) +importFrom(OpenMx,mxMatrix) +importFrom(OpenMx,mxModel) +importFrom(grDevices,devAskNewPage) importFrom(lavaan,cfa) +importFrom(lavaan,coef) importFrom(lavaan,lavInspect) importFrom(lavaan,lavTech) importFrom(lavaan,lav_func_jacobian_complex) diff --git a/NEWS.md b/NEWS.md index f376ccc..0092ca1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,35 @@ +# R2spa 0.0.3 + +- Add function `tspa_plot()` for bivariate and residual plots (#23) + +- `get_fs()` gains argument `corrected_fsT` for computing corrected error estimates (#50) + +- New function `vcov_corrected()` for computing corrected SEs (#39) + +- New function `get_fs_lavaan()` for computing factor scores and relevant matrices directly from a `lavaan` output (#61) + +- Initial support for 2S-PA with *OpenMx* with `tspa_mx()` + +- Update naming of relevant matrices when computing factor scores: + * `fsT`: error covariance of factor scores + * `fsL`: loading matrix of factor scores + * `fsb`: intercepts of factor scores + * `scoring_matrix`: weights for computing factor scores from items + +- New vignettes for: + * Corrected error variance of factor scores (#50) + * Corrected standard errors incorporating uncertainty in measurement parameters of factor scores (#39) + * Using 2S-PA with EFA scores + * Using 2S-PA with OpenMx and definition variables (PR #57) + * Latent interaction with categorical indicators (#27) + * Growth modeling + +- Better error messages for `tspa()` (#53) + +- Support mean structure and growth model (#36, #19) + +- Clean up code with `lintr` (#33) + # R2spa 0.0.2 - Use `pkgdown` to create website, with GitHub action (#22) diff --git a/R/get_fscore.R b/R/get_fscore.R old mode 100755 new mode 100644 index 4ed8a44..7a0619d --- a/R/get_fscore.R +++ b/R/get_fscore.R @@ -10,12 +10,26 @@ #' "regression" to be consistent with #' \code{\link[lavaan]{lavPredict}}, but the Bartlett scores have #' more desirable properties and may be preferred for 2S-PA. +#' @param corrected_fsT Logical. Whether to correct for the the sampling +#' error in the factor score weights when computing +#' the error variance estimates of factor scores. +#' @param vfsLT Logical. Whether to return the covariance matrix of `fsT` +#' and `fsL`, which can be used as input for [vcov_corrected()] +#' to obtain corrected covariances and standard errors for +#' [tspa()] results. This is currently ignored. #' @param ... additional arguments passed to \code{\link[lavaan]{cfa}}. See #' \code{\link[lavaan]{lavOptions}} for a complete list. -#' @return A data frame containing the factor scores (with prefix "fs_") and -#' the standard errors (with suffix "_se"). +#' @return A data frame containing the factor scores (with prefix `"fs_"`), +#' the standard errors (with suffix `"_se"`), the implied loadings +#' of factor `"_by_"` factor scores, and the error variance-covariance +#' of the factor scores (with prefix `"evfs_"`). The following are +#' also returned as attributes: +#' * `fsT`: error covariance of factor scores +#' * `fsL`: loading matrix of factor scores +#' * `fsb`: intercepts of factor scores +#' * `scoring_matrix`: weights for computing factor scores from items #' -#' @importFrom lavaan cfa sem lavInspect lavTech +#' @importFrom lavaan cfa sem lavInspect lavTech coef #' @importFrom stats setNames #' #' @export @@ -41,6 +55,8 @@ get_fs <- function(data, model = NULL, group = NULL, method = c("regression", "Bartlett"), + corrected_fsT = FALSE, + vfsLT = FALSE, ...) { if (!is.data.frame(data)) data <- as.data.frame(data) if (is.null(model)) { @@ -52,9 +68,26 @@ get_fs <- function(data, model = NULL, group = NULL, paste(ind_names, collapse = " + ")) } fit <- cfa(model, data = data, group = group, ...) - est <- lavInspect(fit, what = "est") - y <- lavInspect(fit, what = "data") - prepare_fs_dat <- function(y, est) { + get_fs_lavaan(lavobj = fit, method = method, + corrected_fsT = corrected_fsT, + vfsLT = vfsLT) +} + +#' @inherit get_fs +#' @param lavobj A lavaan model object when using [get_fs_lavaan()]. +#' @export +get_fs_lavaan <- function(lavobj, + method = c("regression", "Bartlett"), + corrected_fsT = FALSE, + vfsLT = FALSE) { + est <- lavInspect(lavobj, what = "est") + y <- lavInspect(lavobj, what = "data") + if (corrected_fsT) { + add_to_evfs <- correct_evfs(lavobj, method = method) + } else { + add_to_evfs <- rep(0, lavInspect(lavobj, what = "ngroups")) + } + prepare_fs_dat <- function(y, est, add) { fscore <- compute_fscore(y, lambda = est$lambda, theta = est$theta, @@ -63,56 +96,76 @@ get_fs <- function(data, model = NULL, group = NULL, alpha = est$alpha, method = method, fs_matrices = TRUE) - augment_fs(est, fscore, attr(fscore, "av_efs")) + augment_fs(est, fscore, attr(fscore, "fsT") + add) } - if (is.null(group)) { - prepare_fs_dat(y, est) + group <- lavInspect(lavobj, what = "group") + if (length(group) == 0) { + out <- prepare_fs_dat(y, est, add_to_evfs[[1]]) } else { - fs_lst <- av_efs_lst <- fsA_lst <- fsb_lst <- - setNames(vector("list", length = length(est)), - fit@Data@group.label) - for (i in seq_along(fs_lst)) { - fs_lst[[i]] <- prepare_fs_dat(y[[i]], est[[i]]) - fs_lst[[i]][[group]] <- names(est[i]) - av_efs_lst[[i]] <- attr(fs_lst[[i]], "av_efs") - fsA_lst[[i]] <- attr(fs_lst[[i]], "fsA") - fsb_lst[[i]] <- attr(fs_lst[[i]], "fsb") + fs_lst <- setNames( + vector("list", length = length(est)), + lavobj@Data@group.label + ) + for (g in seq_along(fs_lst)) { + fs_lst[[g]] <- prepare_fs_dat(y[[g]], est[[g]], add_to_evfs[[g]]) + fs_lst[[g]][[group]] <- names(est[g]) + } + attr_names <- setdiff(names(attributes(fs_lst[[1]])), + c("names", "class", "row.names", "col.names")) + attr_lst <- rep( + list( + setNames(vector("list", length = length(est)), lavobj@Data@group.label) + ), + length(attr_names) + ) + for (j in seq_along(attr_lst)) { + for (g in seq_along(fs_lst)) { + attr_lst[[j]][[g]] <- attr(fs_lst[[g]], which = attr_names[j]) + } + attr(fs_lst, which = attr_names[j]) <- attr_lst[[j]] } - attr(fs_lst, which = "av_efs") <- av_efs_lst - attr(fs_lst, which = "fsA") <- fsA_lst - attr(fs_lst, which = "fsb") <- fsb_lst - fs_lst + out <- fs_lst + } + if (vfsLT) { + attr(out, "vfsLT") <- vcov_ld_evfs(lavobj, method = method) } + out } augment_fs <- function(est, fs, fs_ev) { fs_se <- t(as.matrix(sqrt(diag(fs_ev)))) + # fs_se[is.nan(fs_se)] <- 0 colnames(fs) <- paste0("fs_", colnames(fs)) - colnames(fs_se) <- paste0("fs_", colnames(fs_se), "_se") + colnames(fs_se) <- paste0(colnames(fs_se), "_se") num_lvs <- ncol(fs_ev) fs_evs <- rep(NA, num_lvs * (num_lvs + 1) / 2) count <- 1 for (i in seq_len(num_lvs)) { for (j in seq_len(i)) { fs_evs[count] <- fs_ev[i, j] - names(fs_evs)[count] <- paste0("evfs_", - rownames(fs_ev)[i], "_", - colnames(fs_ev)[j]) + if (i == j) { + names(fs_evs)[count] <- paste0("ev_", rownames(fs_ev)[i]) + } else { + names(fs_evs)[count] <- paste0("ecov_", + rownames(fs_ev)[i], "_", + colnames(fs_ev)[j]) + } count <- count + 1 } } - fsA <- attr(fs, "fsA") - fs_names <- paste0("fs_", colnames(fsA)) - fs_lds <- lapply(seq_len(ncol(fsA)), function(i) { - setNames(fsA[, i], - paste(colnames(fsA)[i], fs_names, sep = "_by_")) + fsL <- attr(fs, "fsL") + fs_names <- paste0("fs_", colnames(fsL)) + fs_lds <- lapply(seq_len(ncol(fsL)), function(i) { + setNames(fsL[, i], + paste(colnames(fsL)[i], fs_names, sep = "_by_")) }) fs_lds <- unlist(fs_lds) fs_dat <- cbind(as.data.frame(fs), fs_se, t(as.matrix(fs_lds)), t(as.matrix(fs_evs))) - attr(fs_dat, "av_efs") <- fs_ev - attr(fs_dat, "fsA") <- fsA + attr(fs_dat, "fsT") <- fs_ev + attr(fs_dat, "fsL") <- fsL attr(fs_dat, "fsb") <- attr(fs, "fsb") + attr(fs_dat, "scoring_matrix") <- attr(fs, "scoring_matrix") fs_dat } @@ -127,9 +180,11 @@ augment_fs <- function(est, fs, fs_ev) { #' @param alpha A vector of length q of latent means. #' @param method A character string indicating the method for computing factor #' scores. Currently, only "regression" is supported. +#' @param center_y Logical indicating whether \code{y} should be mean-centered. +#' Default to \code{TRUE}. #' @param fs_matrices Logical indicating whether covariances of the error -#' portion of factor scores (\code{av_efs}), factor score -#' loading matrix (\eqn{A}; \code{fsA}) and intercept vector +#' portion of factor scores (\code{fsT}), factor score +#' loading matrix (\eqn{L}; \code{fsL}) and intercept vector #' (\eqn{b}; \code{fsb}) should be returned. #' The loading and intercept matrices are the implied #' loadings and intercepts by the model when using the @@ -156,48 +211,182 @@ augment_fs <- function(est, fs, fs_ev) { #' psi = est$psi, #' method = "Bartlett") #' fs_hand - fs_lavaan # same scores -compute_fscore <- function(y, lambda, theta, psi, +compute_fscore <- function(y, lambda, theta, psi = NULL, nu = NULL, alpha = NULL, method = c("regression", "Bartlett"), + center_y = TRUE, acov = FALSE, fs_matrices = FALSE) { method <- match.arg(method) if (is.null(nu)) nu <- colMeans(y) - if (is.null(alpha)) alpha <- rep(0, ncol(as.matrix(lambda))) - covy <- lambda %*% psi %*% t(lambda) + theta - meany <- lambda %*% alpha + nu - y1c <- t(as.matrix(y)) - as.vector(meany) - if (method == "regression") { - if (is.null(psi)) { - stop("input of psi (latent covariance) is needed for regression scores") - } - # Regression score - ginvcovy <- MASS::ginv(covy) - tlam_invcov <- crossprod(lambda, ginvcovy) - a_mat <- psi %*% tlam_invcov - } else if (method == "Bartlett") { - # Bartlett score - ginvth <- MASS::ginv(theta) - tlam_invth <- crossprod(lambda, ginvth) - a_mat <- solve(tlam_invth %*% lambda, tlam_invth) + if (is.null(alpha)) alpha <- matrix(0, nrow = ncol(as.matrix(lambda))) + y1c <- t(as.matrix(y)) + if (center_y) { + meany <- lambda %*% alpha + nu + y1c <- y1c - as.vector(meany) } + a_mat <- compute_a_from_mat(method, + lambda = lambda, psi = psi, theta = theta) fs <- t(a_mat %*% y1c + as.vector(alpha)) if (acov) { - if (is.null(psi)) { - stop("input of psi (latent covariance) is needed for acov") + # if (is.null(psi)) { + # stop("input of psi (latent covariance) is needed for acov") + # } + if (method == "regression") { + covy <- lambda %*% psi %*% t(lambda) + theta + attr(fs, "acov") <- + unclass(psi - a_mat %*% covy %*% t(a_mat)) + } else if (method == "Bartlett") { + attr(fs, "acov") <- + unclass(a_mat %*% theta %*% t(a_mat)) } - dir_minus <- switch(method, regression = 1, Bartlett = -1) - attr(fs, "acov") <- - unclass(dir_minus * (psi - a_mat %*% covy %*% t(a_mat))) } if (fs_matrices) { - fsA <- unclass(a_mat %*% lambda) - attr(fs, "fsA") <- fsA - attr(fs, "fsb") <- alpha - fsA %*% alpha - # tv <- fsA %*% psi %*% t(fsA) + attr(fs, "scoring_matrix") <- a_mat + fsL <- unclass(a_mat %*% lambda) + fs_names <- paste0("fs_", colnames(fsL)) + rownames(fsL) <- fs_names + attr(fs, "fsL") <- fsL + fsb <- as.numeric(alpha - fsL %*% alpha) + names(fsb) <- fs_names + attr(fs, "fsb") <- fsb + # tv <- fsL %*% psi %*% t(fsL) # fsv <- a_mat %*% covy %*% t(a_mat) - # attr(fs, "av_efs") <- fsv - tv - attr(fs, "av_efs") <- a_mat %*% theta %*% t(a_mat) + # attr(fs, "fsT") <- fsv - tv + fsT <- a_mat %*% theta %*% t(a_mat) + rownames(fsT) <- colnames(fsT) <- fs_names + attr(fs, "fsT") <- fsT } fs } + +compute_fspars <- function(par, lavobj, method = c("regression", "Bartlett"), + what = c("a", "evfs", "ldfs")) { + method <- match.arg(method) + what <- match.arg(what) + ngrp <- lavInspect(lavobj, what = "ngroups") + frees <- lavInspect(lavobj, what = "free") + mats <- lavInspect(lavobj, what = "est") + if (ngrp == 1) { + frees <- list(frees) + mats <- list(mats) + } + out <- vector("list", ngrp) + for (g in seq_len(ngrp)) { + free <- frees[[g]] + mat <- mats[[g]] + free_list <- lapply(free, FUN = \(x) x[which(x > 0)]) + for (l in seq_along(free_list)) { + for (i in free_list[[l]]) { + mat[[l]][which(free[[l]] == i)] <- par[i] + } + } + a <- do.call(compute_a_from_mat, + args = c(method, mat[c("lambda", "psi", "theta")])) + if (what == "a") { + out[[g]] <- a + } else if (what == "evfs") { + out[[g]] <- a %*% mat$theta %*% t(a) + } else if (what == "ldfs") { + out[[g]] <- a %*% mat$lambda + } + } + out +} + +compute_a <- function(par, lavobj, method = c("regression", "Bartlett")) { + compute_fspars(par, lavobj = lavobj, method = method, what = "a") +} + +compute_a_from_mat <- function(method = c("regression", "Bartlett"), + lambda, theta, psi = NULL) { + method <- match.arg(method) + if (method == "regression") { + if (is.null(psi)) { + stop("input of psi (latent covariance) is needed for regression scores") + } + compute_a_reg(lambda, theta = theta, psi = psi) + } else if (method == "Bartlett") { + compute_a_bartlett(lambda, theta = theta, psi = psi) + } +} + +compute_a_reg <- function(lambda, theta, psi) { + covy <- lambda %*% psi %*% t(lambda) + theta + ginvcovy <- MASS::ginv(covy) + tlam_invcov <- crossprod(lambda, ginvcovy) + psi %*% tlam_invcov +} + +compute_a_bartlett <- function(lambda, theta, psi = NULL) { + ginvth <- MASS::ginv(theta) + tlam_invth <- crossprod(lambda, ginvth) + solve(tlam_invth %*% lambda, tlam_invth) +} + +correct_evfs <- function(fit, method = c("regression", "Bartlett")) { + method <- match.arg(method) + ngrp <- lavInspect(fit, what = "ngroups") + est_fits <- lavInspect(fit, what = "est") + if (ngrp == 1) est_fits <- list(est_fits) + outs <- vector("list", ngrp) + for (g in seq_len(ngrp)) { + est_fit <- est_fits[[g]] + p <- nrow(est_fit$psi) + jac_a <- vector("list", length = p) + for (i in seq_len(p)) { + jac_a[[i]] <- lavaan::lav_func_jacobian_complex( + function(x, fit, method) { + compute_a(x, lavobj = fit, method = method)[[g]][i, ] + }, + coef(fit), + fit = fit, + method = method + ) + } + out <- matrix(nrow = p, ncol = p) + th <- est_fit$theta + vc_fit <- vcov(fit) + for (j in seq_len(p)) { + for (i in j:p) { + out[i, j] <- sum(diag(th %*% jac_a[[i]] %*% vc_fit %*% t(jac_a[[j]]))) + if (i > j) { + out[j, i] <- out[i, j] + } + } + } + outs[[g]] <- out + } + outs +} + +compute_evfs <- function(par, lavobj, method = c("regression", "Bartlett")) { + compute_fspars(par, lavobj = lavobj, method = method, what = "evfs") +} + +compute_ldfs <- function(par, lavobj, method = c("regression", "Bartlett")) { + compute_fspars(par, lavobj = lavobj, method = method, what = "ldfs") +} + +compute_grad_ld_evfs <- function(fit, method = c("regression", "Bartlett")) { + method <- match.arg(method) + lavaan::lav_func_jacobian_complex( + function(x, fit, method) { + evfs <- compute_evfs(x, lavobj = fit, method = method) + evfs_lower <- lapply(evfs, function(x) { + x[lower.tri(x, diag = TRUE)] + }) + c(unlist(compute_ldfs(x, lavobj = fit, method = method)), + unlist(evfs_lower)) + }, + coef(fit), + fit = fit, + method = method + ) +} + +vcov_ld_evfs <- function(fit, method = c("regression", "Bartlett")) { + method <- match.arg(method) + jac <- compute_grad_ld_evfs(fit, method = method) + jac %*% lavaan::vcov(fit) %*% t(jac) +} \ No newline at end of file diff --git a/R/grandStandardizedSolution.R b/R/grandStandardizedSolution.R index 5ee191f..813bcb1 100644 --- a/R/grandStandardizedSolution.R +++ b/R/grandStandardizedSolution.R @@ -33,7 +33,7 @@ #' ' #' fit1 <- sem(model = mod1, #' data = PoliticalDemocracy) -#' grandStandardizedSolution(fit1) +#' grand_standardized_solution(fit1) #' #' ## A single-group, three-factor example #' mod2 <- ' @@ -47,7 +47,7 @@ #' ' #' fit2 <- sem(model = mod2, #' data = PoliticalDemocracy) -#' grandStandardizedSolution(fit2) +#' grand_standardized_solution(fit2) #' #' ## A multigroup, two-factor example #' mod3 <- ' @@ -60,7 +60,7 @@ #' fit3 <- sem(mod3, data = HolzingerSwineford1939, #' group = "school", #' group.equal = c("loadings", "intercepts")) -#' grandStandardizedSolution(fit3) +#' grand_standardized_solution(fit3) #' #' ## A multigroup, three-factor example #' mod4 <- ' @@ -75,18 +75,21 @@ #' fit4 <- sem(mod4, data = HolzingerSwineford1939, #' group = "school", #' group.equal = c("loadings", "intercepts")) -#' grandStandardizedSolution(fit4) +#' grand_standardized_solution(fit4) -grandStandardizedSolution <- function(object, model_list = NULL, +grand_standardized_solution <- function(object, model_list = NULL, se = TRUE, acov_par = NULL, free_list = NULL, level = .95) { if (is.null(model_list)) model_list <- lavTech(object, what = "est") ns <- lavInspect(object, what = "nobs") if (length(ns) == 1) ns <- NULL - if (is.null(ns)) message( - "The grand standardized solution is equivalent to ", - "lavaan::standardizedSolution() for a model with a single group.") + if (is.null(ns)) { + message( + "The grand standardized solution is equivalent to ", + "lavaan::standardizedSolution() for a model with a single group." + ) + } if (is.null(acov_par)) acov_par <- vcov(object) if (is.null(free_list)) free_list <- lavTech(object, what = "free") @@ -102,7 +105,9 @@ grandStandardizedSolution <- function(object, model_list = NULL, } else { tmp_std_beta <- unlist(grand_std_beta_est(model_list, ns)) group_names <- names(partable_beta) - all_beta_pos <- sapply(group_names, function(x) { partable_beta[[x]]$beta }) + all_beta_pos <- sapply(group_names, function(x) { + partable_beta[[x]]$beta + }) } beta_pos <- which(all_beta_pos != 0) out$est.std <- tmp_std_beta[beta_pos] @@ -113,9 +118,10 @@ grandStandardizedSolution <- function(object, model_list = NULL, free_beta_psi <- free_list[c("beta", "psi")] est <- .combine_est(model_list[c("beta", "psi")], free = free_beta_psi) - jac <- lav_func_jacobian_complex(function(x) - std_beta_est(model_list, free_list = free_list, est = x), - x = est) + jac <- lav_func_jacobian_complex( + function(x) std_beta_est(model_list, free_list = free_list, est = x), + x = est + ) pos_par <- .combine_est(free_beta_psi, free = free_beta_psi) } else { free_beta_psi_alpha <- free_list[which(names(model_list) %in% @@ -123,10 +129,15 @@ grandStandardizedSolution <- function(object, model_list = NULL, est <- .combine_est(model_list[which(names(model_list) %in% c("beta", "psi", "alpha"))], free = free_beta_psi_alpha) - jac <- lav_func_jacobian_complex(function(x) - unlist(grand_std_beta_est(model_list, ns = ns, - free_list = free_list, est = x)), - x = est) + jac <- lav_func_jacobian_complex( + function(x) { + unlist(grand_std_beta_est(model_list, + ns = ns, + free_list = free_list, est = x + )) + }, + x = est + ) pos_par <- .combine_est(free_beta_psi_alpha, free = free_beta_psi_alpha) } @@ -135,7 +146,8 @@ grandStandardizedSolution <- function(object, model_list = NULL, out$se <- sqrt(diag(as.matrix(tmp_acov_std_beta[beta_pos, beta_pos]))) out$z <- out$est.std / out$se out$pvalue <- 2 * (1 - pnorm(abs(out$z))) - ci <- out$est.std + out$se %o% qnorm(c((1 - level)/2, 1 - (1 - level)/2)) + ci <- out$est.std + + out$se %o% qnorm(c((1 - level) / 2, 1 - (1 - level) / 2)) out$ci.lower <- ci[, 1] out$ci.upper <- ci[, 2] } @@ -244,3 +256,7 @@ grand_std_beta_est <- function(model_list, ns, free_list = NULL, est = NULL) { diag(inv_s_eta) %*% x %*% diag(s_eta) }) } + +#' @rdname grand_standardized_solution +#' @export +grandStandardizedSolution <- grand_standardized_solution diff --git a/R/helper.R b/R/helper.R new file mode 100644 index 0000000..fac87de --- /dev/null +++ b/R/helper.R @@ -0,0 +1,34 @@ +#' Create block diagonal matrix +#' @param ... Either multiple matrices or a list of matrices. +#' @export +block_diag <- function(...) { + if (...length() > 1) { + x <- list(...) + } else { + x <- as.list(...) + } + p <- 0 + rn <- cn <- NULL + for (m in x) { + if (!is.matrix(m) || ncol(m) != nrow(m)) { + stop("Input to `block_diag()` must be a list", + "or multiple arguments of square matrices.") + } + rn <- c(rn, rownames(m)) + cn <- c(cn, colnames(m)) + p <- p + ncol(m) + } + if (length(rn) + length(cn) > 0 && (length(rn) != p || length(cn) != p)) { + stop("Matrices must all have column and row names, ", + "or all have no names.") + } + out <- matrix(0, nrow = p, ncol = p, + dimnames = list(rn, cn)) + last_col <- 0 + for (m in x) { + ind <- last_col + seq_len(nrow(m)) + out[ind, ind] <- m + last_col <- last_col + ncol(m) + } + out +} \ No newline at end of file diff --git a/R/tspa.R b/R/tspa.R old mode 100755 new mode 100644 index aa360e2..0b32999 --- a/R/tspa.R +++ b/R/tspa.R @@ -8,25 +8,31 @@ #' @param reliability A numeric vector representing the reliability indexes #' of each latent factor. Currently \code{tspa()} does not #' support the reliability argument. Please use \code{se}. -#' @param se A numeric vector representing the standard errors of each factor -#' score variable for single-group 2S-PA. A list or data frame -#' storing the standard errors of each group in each latent factor -#' for multigroup 2S-PA. -#' @param vc An error variance-covariance matrix of the factor scores, which -#' can be obtained from the output of \code{get_fs()} using -#' \code{attr()} with the argument \code{which = "av_efs"}. -#' @param cross_loadings A matrix of loadings and cross-loadings from the -#' latent variables to the factor scores \code{fs}, which -#' can be obtained from the output of \code{get_fs()} -#' using \code{attr()} with the argument -#' \code{which = "fsA"}. -#' For details see the multiple-factors vignette: -#' \code{vignette("multiple-factors", package = "R2spa")}. +#' @param se Deprecated to avoid conflict with the argument of the same name +#' in [lavaan::lavaan()]. +#' @param se_fs A numeric vector representing the standard errors of each +#' factor score variable for single-group 2S-PA. A list or data +#' frame storing the standard errors of each group in each latent +#' factor for multigroup 2S-PA. +#' @param fsT An error variance-covariance matrix of the factor scores, which +#' can be obtained from the output of \code{get_fs()} using +#' \code{attr()} with the argument \code{which = "fsT"}. +#' @param fsL A matrix of loadings and cross-loadings from the +#' latent variables to the factor scores \code{fs}, which +#' can be obtained from the output of \code{get_fs()} using +#' \code{attr()} with the argument \code{which = "fsL"}. +#' For details see the multiple-factors vignette: +#' \code{vignette("multiple-factors", package = "R2spa")}. +#' @param fsb A vector of intercepts for the factor scores \code{fs}, which can +#' be obtained from the output of \code{get_fs()} using \code{attr()} +#' with the argument \code{which = "fsb"}. #' @param ... Additional arguments passed to \code{\link[lavaan]{sem}}. See #' \code{\link[lavaan]{lavOptions}} for a complete list. #' @return An object of class \code{lavaan}, with an attribute \code{tspaModel} #' that contains the model syntax. +#' #' @export +#' #' @examples #' library(lavaan) #' @@ -39,8 +45,8 @@ #' fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) #' # tspa model #' tspa(model = "dem60 ~ ind60", data = fs_dat, -#' se = c(ind60 = fs_dat_ind60[1, "fs_ind60_se"], -#' dem60 = fs_dat_dem60[1, "fs_dem60_se"])) +#' se_fs = c(ind60 = fs_dat_ind60[1, "fs_ind60_se"], +#' dem60 = fs_dat_dem60[1, "fs_dem60_se"])) #' #' # single-group, three-factor example #' mod2 <- " @@ -53,8 +59,8 @@ #' tspa(model = "dem60 ~ ind60 #' dem65 ~ ind60 + dem60", #' data = fs_dat2, -#' vc = attr(fs_dat2, "av_efs"), -#' cross_loadings = attr(fs_dat2, "fsA")) +#' fsT = attr(fs_dat2, "fsT"), +#' fsL = attr(fs_dat2, "fsL")) #' #' # multigroup, two-factor example #' mod3 <- " @@ -66,8 +72,8 @@ #' group = "school") #' tspa(model = "visual ~ speed", #' data = fs_dat3, -#' vc = attr(fs_dat3, "av_efs"), -#' cross_loadings = attr(fs_dat3, "fsA"), +#' fsT = attr(fs_dat3, "fsT"), +#' fsL = attr(fs_dat3, "fsL"), #' group = "school") #' #' # multigroup, three-factor example @@ -82,8 +88,8 @@ #' tspa(model = "visual ~ speed #' textual ~ visual + speed", #' data = fs_dat4, -#' vc = attr(fs_dat4, "av_efs"), -#' cross_loadings = attr(fs_dat4, "fsA"), +#' fsT = attr(fs_dat4, "fsT"), +#' fsL = attr(fs_dat4, "fsL"), #' group = "school") #' #' # get factor scores @@ -99,138 +105,137 @@ #' # tspa model #' tspa(model = "visual ~ speed", #' data = fs_hs, -#' se = data.frame(visual = c(0.3391326, 0.311828), -#' speed = c(0.2786875, 0.2740507)), +#' se_fs = data.frame(visual = c(0.3391326, 0.311828), +#' speed = c(0.2786875, 0.2740507)), #' group = "school", #' group.equal = "regressions") #' #' # manually adding equality constraints on the regression coefficients #' tspa(model = "visual ~ c(b1, b1) * speed", #' data = fs_hs, -#' se = list(visual = c(0.3391326, 0.311828), -#' speed = c(0.2786875, 0.2740507)), +#' se_fs = list(visual = c(0.3391326, 0.311828), +#' speed = c(0.2786875, 0.2740507)), #' group = "school") -tspa <- function(model, data, reliability = NULL, se = NULL, - vc = NULL, cross_loadings = NULL, ...) { +tspa <- function(model, data, reliability = NULL, se = "standard", + se_fs = NULL, fsT = NULL, fsL = NULL, fsb = NULL, ...) { + + if (!inherits(model, "character")) { + stop("The structural path model provided is not a string.") + } + if (!is.null(reliability)) { stop("tspa() currently does not support reliability model") } - if (!is.data.frame(se)) { - se <- as.data.frame(as.list(se)) + if (!is.character(se)) { + warning("using `se` to set se for factor scores is deprecated. ", + "use `se_fs` instead.") } - multigroup <- nrow(se) == 1 | is.list(vc) - if (multigroup) { - if (is.null(vc)) { # SE - tspaModel <- tspaMultipleGroupSe(model, data, se) - } else { # covariance - tspaModel <- tspaMultipleGroupMF(model, data, vc, cross_loadings) - data <- do.call(rbind, data) + if (!is.data.frame(se_fs)) { + se_fs <- as.data.frame(as.list(se_fs)) + } + multigroup <- nrow(se_fs) > 1 | is.list(fsT) + + if (xor(is.null(fsT), is.null(fsL))) { + stop("Please provide both or none of fsT and fsL.") + } + + if (!is.null(fsT)) { + fs_names <- ifelse(multigroup, colnames(fsT[[1]]), colnames(fsT)) + dat_names <- ifelse(multigroup, names(data[[1]]), names(data)) + names_match <- lapply(fs_names, function(x) x %in% dat_names) |> unlist() + if (any(!names_match)) { + stop( + "Names of factor score variables do not match those in the input data." + ) } - } else { - if (is.null(vc)) { # SE - tspaModel <- tspaSingleGroup(model, data, se) - } else { # covariance - tspaModel <- tspaSingleGroupMF(model, data, vc, cross_loadings) + } + + if (multigroup && is.null(list(...)[["group"]])) { + stop("Please specify 'group = ' to fit a multigroup model in lavaan.") + } + + if (is.null(fsT)) { # single-factor measurement model + tspaModel <- tspa_sf(model, data, se_fs) + } else { # multi-factor measurement model + tspaModel <- tspa_mf(model, data, fsT, fsL, fsb) + if (inherits(data, "list")) { + data <- do.call(rbind, data) } } tspa_fit <- sem(model = tspaModel, data = data, + se = se, ...) - # to access the attribute, use attr(x,"tspaModel") attr(tspa_fit, "tspaModel") <- tspaModel + if (!is.null(fsT)) { + attr(tspa_fit, "fsT") <- fsT + attr(tspa_fit, "fsL") <- fsL + } + attr(tspa_fit, "tspa_call") <- match.call() return(tspa_fit) } -tspaSingleGroup <- function(model, data, se = NULL) { +tspa_sf <- function(model, data, se = NULL) { if (nrow(se) != 0) { ev <- se^2 var <- names(se) len <- length(se) - + group <- nrow(se) # col <- colnames(data) fs <- paste0("fs_", var) + tspaModel <- rep(NA, len) latent_var <- rep(NA, len) error_constraint <- rep(NA, len) - # latent_variance <- rep(NA, len) - for (x in seq_len(len)) { - latent_var[x] <- paste0(var[x], " =~ ", fs[x], "\n") - error_constraint[x] <- paste0(fs[x], " ~~ ", ev[x], " * ", fs[x], "\n") - # latent_variance[x] <- paste0(var[x], " ~~ v", x, " * ", var[x], "\n") - } + latent_var <- lapply(seq_len(len), function(x) { + paste0( + var[x], "=~ c(", + paste0(rep(1, group), collapse = ", "), ") * ", fs[x], "\n" + ) + }) + + error_constraint <- lapply(seq_len(len), function(x) { + paste0( + fs[x], "~~ c(", paste(ev[x], collapse = ", "), ") * ", fs[x], "\n" + ) + }) latent_var_str <- paste(latent_var, collapse = "") error_constraint_str <- paste(error_constraint, collapse = "") - # latent_variance_str <- paste(latent_variance, collapse="") - tspaModel <- paste0("# latent variables (indicated by factor scores)\n", - latent_var_str, - "# constrain the errors\n", - error_constraint_str, - "# latent variances\n", - # latent_variance_str, - # "# regressions\n", - model, - "\n") + tspaModel <- paste0( + c("# latent variables (indicated by factor scores)", + latent_var_str, + "# constrain the errors", + error_constraint_str, + "# structural model", + model), + collapse = "\n" + ) return(tspaModel) } } -tspaSingleGroupMF <- function(model, data, vc, cross_loadings) { - # ev <- se^2 - var <- colnames(vc) - len <- nrow(vc) - - col <- colnames(data) - fs <- paste0("fs_", var) - colnames(vc) <- rownames(vc) <- fs - - # latent variables - loadings <- paste0(cross_loadings, " * ", fs) - loadings_list <- split(loadings, factor(rep(var, each = len), - levels = var)) - loadings_c <- lapply(loadings_list, function(x) { - paste0(x, collapse = " + ") - }) - latent_var_str <- paste(var, "=~", loadings_c) - # error variances - vc_in <- !upper.tri(vc) - ev_rhs <- colnames(vc)[col(vc_in)[vc_in]] - ev_lhs <- rownames(vc)[row(vc_in)[vc_in]] - error_constraint_str <- paste0(ev_lhs, " ~~ ", vc[vc_in], " * ", ev_rhs) - # # latent variances - # latent_variance_str <- paste(var, "~~", var) - - tspaModel <- paste0(c( - "# latent variables (indicated by factor scores)", - latent_var_str, - "# constrain the errors", - error_constraint_str, - # "# latent variances", - # latent_variance_str, - "# regressions", - model - ), - collpase = "\n") - - return(tspaModel) -} - -tspaMultipleGroupMF <- function(model, data, vc, cross_loadings) { - ngroup <- length(vc) - var <- colnames(vc[[1]]) +tspa_mf <- function(model, data, fsT, fsL, fsb) { + if (is.list(fsT)) { + ngroup <- length(fsT) + fsL1 <- fsL[[1]] + fsT_in <- !upper.tri(fsT[[1]]) + } else { + ngroup <- 1 + fsL1 <- fsL + fsT_in <- !upper.tri(fsT) + } + var <- colnames(fsL1) nvar <- length(var) - - col <- colnames(data[[1]]) - fs <- paste0("fs_", var) - # colnames(vc) <- rownames(vc) <- fs + fs <- rownames(fsL1) # latent variables - loadings_mat <- matrix(unlist(cross_loadings), ncol = ngroup) + loadings_mat <- matrix(unlist(fsL), ncol = ngroup) loadings <- apply(loadings_mat, 1, function(x) { paste0("c(", paste0(x, collapse = ", "), ") * ") }) |> @@ -240,75 +245,36 @@ tspaMultipleGroupMF <- function(model, data, vc, cross_loadings) { loadings_c <- lapply(loadings_list, function(x) { paste0(x, collapse = " + ") }) - latent_var_str <- paste(var, "=~", loadings_c) + latent_var_str <- paste("# latent variables (indicated by factor scores)\n", + var, "=~", loadings_c) # error variances - vc_in <- !upper.tri(vc[[1]]) - ev_rhs <- paste0("fs_", colnames(vc[[1]])[col(vc_in)[vc_in]]) - ev_lhs <- paste0("fs_", rownames(vc[[1]])[row(vc_in)[vc_in]]) - errors_mat <- matrix(unlist(vc), ncol = ngroup)[as.vector(vc_in), ] + ev_rhs <- fs[col(fsT_in)[fsT_in]] + ev_lhs <- fs[row(fsT_in)[fsT_in]] + errors_mat <- matrix(unlist(fsT), ncol = ngroup)[as.vector(fsT_in), , + drop = FALSE] errors <- apply(errors_mat, 1, function(x) { paste0("c(", paste0(x, collapse = ", "), ")") }) - error_constraint_str <- paste0(ev_lhs, " ~~ ", errors, " * ", ev_rhs) - # # latent variances - # latent_variance_str <- paste(var, "~~", var) + error_constraint_str <- paste0("# constrain the errors\n", + ev_lhs, " ~~ ", errors, " * ", ev_rhs) + if (!is.null(fsb)) { + # intercepts + intercepts_mat <- matrix(unlist(fsb), ncol = ngroup) + intercepts <- split(intercepts_mat, rep(seq_len(nrow(intercepts_mat)), ngroup)) + intercept_constraint <- paste0("# constrain the intercepts\n", + fs, " ~ ", intercepts, " * 1") + } else { + intercept_constraint <- "" + } tspaModel <- paste0(c( - "# latent variables (indicated by factor scores)", latent_var_str, - "# constrain the errors", error_constraint_str, - # "# latent variances", - # latent_variance_str, - "# regressions", + intercept_constraint, + "# structural model", model ), collpase = "\n") return(tspaModel) } - - -tspaMultipleGroupSe <- function(model, data, se = NULL) { - # if (is.list(se)) { - # len <- - # } - if (nrow(se) != 0) { - ev <- se^2 - var <- names(se) - len <- length(se) - group <- nrow(se) - # col <- colnames(data) - fs <- paste0("fs_", var) - tspaModel <- rep(NA, len) - latent_var <- rep(NA, len) - error_constraint <- rep(NA, len) - # latent_variance <- rep(NA, len) - - for (x in seq_len(len)) { - latent_var[x] <- paste0( - var[x], "=~ c(", - paste0(rep(1, group), collapse = ", "), ") * ", fs[x], "\n" - ) - error_constraint[x] <- paste0( - fs[x], "~~ c(", paste(ev[x], collapse = ", "), ") * ", fs[x], "\n" - ) - # latent_variance[x] <- paste0(var[x], " ~~ c(", paste0(paste0(rep(paste0("v",x), group), 1:group), collapse = ", "), ") * ", var[x], "\n") - } - - latent_var_str <- paste(latent_var, collapse = "") - error_constraint_str <- paste(error_constraint, collapse = "") - # latent_variance_str <- paste(latent_variance, collapse="") - tspaModel <- paste0("# latent variables (indicated by factor scores)\n", - latent_var_str, - "# constrain the errors\n", - error_constraint_str, - # "# latent variances\n", - # latent_variance_str, - "# regressions\n", - model, - "\n") - - return(tspaModel) - } -} diff --git a/R/tspa_corrected_se.R b/R/tspa_corrected_se.R new file mode 100644 index 0000000..11026dc --- /dev/null +++ b/R/tspa_corrected_se.R @@ -0,0 +1,78 @@ +#' First-order correction of sampling covariance for 2S-PA estimates +#' @param tspa_fit A fitted model from [tspa()]. +#' @param vfsLT The sampling covariance matrix of `fsL` and `fsT`, which can be +#' obtained with [get_fs()] with the argument `vfsLT = TRUE`. +#' @param which_free An optional numeric vector indicating which parameters +#' in `fsL` and `fsT` are free. The parameters are ordered +#' by the `fsL` matrix and the lower-triangular part of +#' `fsT`, by columns. For example, for a two-factor model, +#' `fsL` and `fsT` are both 2 x 2 matrices, and the +#' error covariance between the two factor scores (i.e., +#' the `[2, 1]` element in `fsT`) has an index of 6. +#' @param ... Currently not used. +#' @return A corrected covariance matrix in the same dimension as +#' `vcov(tspa_fit)`. +#' @export +vcov_corrected <- function(tspa_fit, vfsLT, which_free = NULL, ...) { + if (is.null(attr(tspa_fit, "fsT"))) { + stop("corrected vcov requires a tspa model with ", + "vc and cross-loadings specified.") + } + ngrp <- lavInspect(tspa_fit, what = "ngroups") + val_fsL <- attr(tspa_fit, "fsL") + val_fsT <- attr(tspa_fit, "fsT") + if (ngrp == 1) { + if (!is.list(val_fsL)) { + val_fsL <- list(val_fsL) + } + if (!is.list(val_fsT)) { + val_fsT <- list(val_fsT) + } + } + val_fsT_lower <- lapply(val_fsT, function(x) { + x[lower.tri(x, diag = TRUE)] + }) + val_fsLT <- c(unlist(val_fsL), unlist(val_fsT_lower)) + if (is.null(which_free)) { + which_free <- seq_along(val_fsLT) + } + jac <- lavaan::lav_func_jacobian_complex( + function(x) { + par <- val_fsLT + par[which_free] <- x + counter <- 0 + num_ld <- length(val_fsL[[1]]) + for (g in seq_len(ngrp)) { + val_fsL[[g]][] <- par[(1:num_ld) + counter] + counter <- counter + num_ld + } + num_ev <- sum(lower.tri(val_fsT[[1]], diag = TRUE)) + for (g in seq_len(ngrp)) { + val_fsT[[g]][] <- lavaan::lav_matrix_lower2full( + par[(1:num_ev) + counter] + ) + counter <- counter + num_ev + } + if (ngrp == 1) { + val_fsL <- val_fsL[[1]] + val_fsT <- val_fsT[[1]] + } + lavaan::coef(update_tspa(tspa_fit, fsL = val_fsL, fsT = val_fsT)) + }, + x = val_fsLT[which_free], + ) + vcov(tspa_fit) + jac %*% vfsLT %*% t(jac) +} + +update_tspa <- function(tspa_fit, fsL, fsT) { + call <- attr(tspa_fit, which = "tspa_call") + if (!missing(fsL)) { + call$fsL <- eval(call$fsL) + call$fsL <- fsL + } + if (!missing(fsT)) { + call$fsT <- eval(call$fsT) + call$fsT <- fsT + } + eval(call) +} \ No newline at end of file diff --git a/R/tspa_mx.R b/R/tspa_mx.R new file mode 100644 index 0000000..6e8f585 --- /dev/null +++ b/R/tspa_mx.R @@ -0,0 +1,168 @@ +#' Two-Stage Path Analysis with Definition Variables Using OpenMx +#' +#' Fit a two-stage path analysis (2S-PA) model. +#' +#' @param mx_model A model object of class [OpenMx::MxRAMModel-class], created +#' by [OpenMx::mxModel()] or the `umx` package. This +#' is the structural model part. +#' @param data A data frame containing factor scores. +#' @param mat_ld A \eqn{p \times p} matrix indicating the loadings of the +#' factor scores on the latent variables. The *i*th row indicate +#' loadings of the *i*th factor score variable on the latent +#' variables. This could be one of the following: +#' * A matrix created by [OpenMx::mxMatrix()] with loading +#' values. +#' * A named numeric matrix, with rownames and column names +#' matching the factor score and latent variables. +#' * A named character matrix, with rownames and column names +#' matching the factor score and latent variables, and the +#' character values indicating the variable names in `data` for +#' the corresponding loadings. +#' @param mat_vc Similar to `mat_ld` but for the error variance-covariance +#' matrix of the factor scores. +#' @param ... Additional arguments passed to [OpenMx::mxModel()]. +#' @return An object of class [OpenMx::MxModel-class]. Note that the +#' model has not been run. +#' +#' @importFrom OpenMx mxData mxMatrix mxAlgebraFromString mxExpectationNormal +#' mxModel mxFitFunctionML +#' +#' @export +#' +#' @examples +#' library(mirt) +#' library(umx) +#' library(OpenMx) +#' # Simulate data with mirt +#' set.seed(1324) +#' num_obs <- 100 +#' # Simulate theta +#' eta <- MASS::mvrnorm(num_obs, mu = c(0, 0), Sigma = diag(c(1, 1 - 0.5^2)), +#' empirical = TRUE) +#' th1 <- eta[, 1] +#' th2 <- -1 + 0.5 * th1 + eta[, 2] +#' # items and response data +#' a1 <- matrix(1, 10) +#' d1 <- matrix(rnorm(10)) +#' a2 <- matrix(runif(10, min = 0.5, max = 1.5)) +#' d2 <- matrix(rnorm(10)) +#' dat1 <- mirt::simdata(a = a1, d = d1, +#' N = num_obs, itemtype = "2PL", Theta = th1) +#' dat2 <- mirt::simdata(a = a2, d = d2, N = num_obs, +#' itemtype = "2PL", Theta = th2) +#' # Factor scores +#' mod1 <- mirt(dat1, model = 1, itemtype = "Rasch", verbose = FALSE) +#' mod2 <- mirt(dat2, model = 1, itemtype = "2PL", verbose = FALSE) +#' fs1 <- fscores(mod1, full.scores.SE = TRUE) +#' fs2 <- fscores(mod2, full.scores.SE = TRUE) +#' # Combine factor scores and standard errors into data set +#' fs_dat <- as.data.frame(cbind(fs1, fs2)) +#' names(fs_dat) <- c("fs1", "se_fs1", "fs2", "se_fs2") +#' # Compute reliability and error variances +#' fs_dat <- within(fs_dat, expr = { +#' rel_fs1 <- 1 - se_fs1^2 +#' rel_fs2 <- 1 - se_fs2^2 +#' ev_fs1 <- se_fs1^2 * (1 - se_fs1^2) +#' ev_fs2 <- se_fs2^2 * (1 - se_fs2^2) +#' }) +#' # OpenMx model (from umx so that lavaan syntax can be used) +#' fsreg_umx <- umxLav2RAM( +#' " +#' fs2 ~ fs1 +#' fs2 + fs1 ~ 1 +#' ", +#' printTab = FALSE) +#' # Prepare loading and error covariance matrices +#' cross_load <- matrix(c("rel_fs2", NA, NA, "rel_fs1"), nrow = 2) |> +#' `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +#' err_cov <- matrix(c("ev_fs2", NA, NA, "ev_fs1"), nrow = 2) |> +#' `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +#' # Create 2S-PA model (with definition variables) +#' tspa_mx <- +#' tspa_mx_model(fsreg_umx, +#' data = fs_dat, +#' mat_ld = cross_load, +#' mat_vc = err_cov +#' ) +#' # Run OpenMx +#' tspa_mx_fit <- mxRun(tspa_mx) +#' # Summarize the results +#' summary(tspa_mx_fit) + +tspa_mx_model <- function(mx_model, data, mat_ld, mat_vc, ...) { + str_name <- mx_model$name + p <- ncol(mat_vc) + if (!isTRUE(attr(class(mat_ld), which = "package") == "OpenMx")) { + fs_name <- mx_model$manifestVars + ld_ind <- match(fs_name, table = rownames(mat_ld)) + if (any(is.na(ld_ind))) { + stop("`rownames(mat_ld)` must match the names of the", + "factor score variables.") + } + mat_ld <- make_mx_ld(mat_ld[ld_ind, ld_ind]) + } + if (!isTRUE(attr(class(mat_vc), which = "package") == "OpenMx")) { + fs_name <- mx_model$manifestVars + vc_ind <- match(fs_name, table = rownames(mat_vc)) + if (any(is.na(vc_ind))) { + stop("`rownames(mat_vc)` must match the names of the", + "factor score variables.") + } + mat_vc <- make_mx_vc(mat_vc[vc_ind, vc_ind]) + } + mxModel( + "2SPAD", + mxData(observed = data, type = "raw"), + mx_model, mat_ld, mat_vc, + mxMatrix(type = "Iden", nrow = p, ncol = p, name = "I"), + mxAlgebraFromString(paste0("(L %*% solve(I - ", str_name, ".A)) %&% ", + str_name, ".S + E"), name = "expCov"), + mxAlgebraFromString(paste0(str_name, ".M %*% t(L %*% solve(I - ", + str_name, ".A))"), name = "expMean"), + # mxAlgebraFromString(paste0(str_name, ".M"), name = "expMean"), + mxExpectationNormal( + covariance = "expCov", means = "expMean", + dimnames = mx_model$manifestVars), + mxFitFunctionML() + ) +} + +make_mx_ld <- function(ld_mat) { + if (is.numeric(ld_mat)) { + val <- ld_mat + lab <- NA + } else if (is.character(ld_mat)) { + lab <- ld_mat + lab[!is.na(lab)] <- paste0("data.", lab[!is.na(lab)]) + val <- NA + } else { + stop("`ld_mat` must be either a numeric matrix or a character matrix.") + } + mxMatrix( + type = "Full", nrow = nrow(ld_mat), ncol = nrow(ld_mat), + free = FALSE, + values = val, + labels = lab, + name = "L" + ) +} + +make_mx_vc <- function(vc_mat) { + if (is.numeric(vc_mat)) { + val <- vc_mat + lab <- NA + } else if (is.character(vc_mat)) { + lab <- vc_mat + lab[!is.na(lab)] <- paste0("data.", lab[!is.na(lab)]) + val <- NA + } else { + stop("`vc_mat` must be either a numeric matrix or a character matrix.") + } + mxMatrix( + type = "Symm", nrow = nrow(vc_mat), ncol = nrow(vc_mat), + free = FALSE, + values = val, + labels = lab, + name = "E" + ) +} \ No newline at end of file diff --git a/R/tspa_plot.R b/R/tspa_plot.R new file mode 100644 index 0000000..07bddc2 --- /dev/null +++ b/R/tspa_plot.R @@ -0,0 +1,232 @@ +#' Diagnostic plots of fitted 2S-PA model +#' +#' @param tspa_fit An object of class [lavaan][lavaan-class], +#' representing the output generated from the [tspa()] +#' function. +#' @param title Character. Set the name of scatter plot. The default value is +#' "Scatterplot". +#' @param label_x Character. Set the name of the x-axis. The default value is +#' "fs_" followed by variable names. +#' @param label_y Character. Set the name of the y-axis. The default value is +#' "fs_" followed by variable names. +#' @param abbreviation Logic input. If `FALSE` is indicated, the group name +#' will be shown in full. The default setting is `TRUE`. +#' @param ask Logic input. If `TRUE` is indicated, the user will be asked before +#' before each plot is generated. The default setting is 'False'. +#' @param fscores_type Character. Set the type of factor score for input. +#' The default setting is using factor score from observed +#' data (i.e., output from [get_fs()]). If +#' `fscore_type = "est"`, then it will use output from +#' [lavaan::lavPredict()]. +#' @param ... Additional arguments passed to \code{\link[graphics]{plot}}. See +#' \code{\link[graphics]{plot}} for a list. +#' +#' @return A scatterplot between factor scores, and a residual plot. +#' +#' @importFrom grDevices devAskNewPage +#' +#' @export +#' +#' @examples +#' library(lavaan) +#' model <- " +#' # latent variable definitions +#' ind60 =~ x1 + x2 + x3 +#' dem60 =~ y1 + a*y2 + b*y3 + c*y4 +#' # regressions +#' dem60 ~ ind60 +#' " +#' fs_dat_ind60 <- get_fs( +#' data = PoliticalDemocracy, +#' model = "ind60 =~ x1 + x2 + x3" +#' ) +#' fs_dat_dem60 <- get_fs( +#' data = PoliticalDemocracy, +#' model = "dem60 =~ y1 + y2 + y3 + y4" +#' ) +#' fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) +#' +#' tspa_fit <- tspa( +#' model = "dem60 ~ ind60", +#' data = fs_dat, +#' se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472) +#' ) +#' tspa_plot(tspa_fit) +tspa_plot <- function(tspa_fit, + title = NULL, + label_x = NULL, + label_y = NULL, + abbreviation = TRUE, + fscores_type = c("original", "lavaan"), + ask = FALSE, + ...) { + fit_pars <- lavaan::parameterestimates(tspa_fit) # parameter estimates + # HL Note: this may not be the most reliable in extracting exo and + # endo variables. + regression_fit <- fit_pars[which(fit_pars$op == "~"), ] # path coefficients + # latent_dv <- unique(c(unname(t(regression_fit["lhs"])))) # Extract DV + # latent_iv <- unique(c(unname(t(regression_fit["rhs"])))) # Extract IV + fscores_type <- match.arg(fscores_type) # Specify the type of factor scores + + # reg_pairs <- names(coef(tspa_fit)[names(coef(tspa_fit)) %in% + # apply(expand.grid(latent_dv, latent_iv), 1, paste, collapse = "~")]) + # HL: Perhaps an easier way + reg_pairs <- paste(regression_fit[["lhs"]], regression_fit[["rhs"]], + sep = "~") + + # Type of scatterplot + if (fscores_type == "original") { + # factor scores from `get_fs` + fscores <- lavaan::lavInspect(tspa_fit, what = "data") + } else { + # factor scores from estimation using `lavPredict` + fscores <- lavaan::lavPredict(tspa_fit) + } + + # Determine multi-group sample + # HL: To simplify the code, just convert fscores to a list for single groups, + # and the for loop can apply equally. + if (!is.list(fscores)) { + fscores <- list(fscores) + g_nums <- list(NULL) + } else { + g_nums <- as.list(seq_along(fscores)) + } + # Ask for abbreviation + g_names <- names(fscores) + if (!is.null(g_names) && abbreviation) { + g_names <- abbreviate(g_names) + } + + # Prepare for abline + fscores <- lapply(fscores, as.data.frame) + # slope <- coef(tspa_fit)[grepl(apply(expand.grid(latent_dv, latent_iv), 1, paste, collapse="~"), + # names(coef(tspa_fit)))] + # fs_means <- matrix(unlist(lapply(df_latent_scores, apply, 2, mean, na.rm = T)), + # nrow = length(slope), + # byrow = T) + # intercept <- fs_means[,2] - fs_means[,1]*slope + + # Ask for next plot + if (ask) { + oask <- devAskNewPage(TRUE) + on.exit(devAskNewPage(oask)) + } + + for (g in seq_along(g_nums)) { + for (i in seq_along(reg_pairs)) { + + # Scatterplot + plot_scatter( + # Note: Use `[[` for list + fscores_df = fscores[[g]], + iv = unlist(strsplit(reg_pairs[i], split = "~"))[2], + dv = unlist(strsplit(reg_pairs[i], split = "~"))[1], + # ab_slope = slope[g], + # ab_intercept = intercept[g], + g_num = g_nums[[g]], + g_name = g_names[g], + title = title[i], + label_x = label_x[i], + label_y = label_y[i], + ... + ) + + # Residual Plot + plot_residual( + fscores_df = fscores[[g]], + iv = unlist(strsplit(reg_pairs[i], split = "~"))[2], + dv = unlist(strsplit(reg_pairs[i], split = "~"))[1], + tspa_fit = tspa_fit, + g_num = g_nums[[g]], + g_name = g_names[g], + ... + ) + } + } +} + +# Helper function for the scatter plot +plot_scatter <- function(fscores_df, iv, dv, + # ab_slope, ab_intercept, + g_num, g_name, + title, label_x, label_y, + ...) { + # Group name for multiple-group case + # HL: The function exists to handle single-group. Make input + # consistent to avoid extra conditional statements. + iv_data <- fscores_df[, paste0("fs_", iv)] + dv_data <- fscores_df[, paste0("fs_", dv)] + + # ylab + ylab <- c() + if (is.null(label_y)) { + ylab <- paste0("fs_", dv) + } else { + ylab <- label_y + } + + # ylab + xlab <- c() + if (is.null(label_x)) { + xlab <- paste0("fs_", iv) + } else { + xlab <- label_x + } + + # title + # HL: Simplify to two conditionals + if (is.null(title)) { + title <- paste0("Scatterplot") + } + if (!is.null(g_name)) { + title <- paste0(title, " (Group ", g_num, ": ", g_name, ")") + } + + plot(iv_data, + dv_data, + ylab = ylab, + xlab = xlab, + main = title, + pch = 16, + ... + ) + # abline(a = ab_intercept, b = ab_slope, lwd = 2) +} + +# Helper function for the residual plot +# HL: Incorrect to say `g_num = g_num`, as one cannot assume there +# `g_num` exists in the parent environment. +plot_residual <- function(fscores_df, iv, dv, + tspa_fit, + g_num, g_name, + ...) { + # HL: The function exists to handle single-group. Make input + # consistent to avoid extra conditional statements. + iv_data <- fscores_df[, paste0("fs_", iv)] + dv_data <- fscores_df[, paste0("fs_", dv)] + + predicted_y <- lavaan::lavPredictY(tspa_fit, + ynames = paste0("fs_", dv), xnames = paste0("fs_", iv), + assemble = FALSE + ) + + if (!is.null(g_name)) { + predicted_y <- predicted_y[[g_num]] + # title + title <- paste0("Residual Plot", " (Group ", g_num, ": ", g_name, ")") + } else { + title <- paste0("Residual Plot") + } + + resid <- dv_data - predicted_y + + plot(iv_data, + resid, + ylab = paste0("Residuals: ", dv), + xlab = paste0("Fitted values: ", iv), + main = title, + pch = 18, + ... + ) +} diff --git a/R2spa.Rproj b/R2spa.Rproj old mode 100755 new mode 100644 diff --git a/README.Rmd b/README.Rmd index de87cee..d9e5003 100644 --- a/README.Rmd +++ b/README.Rmd @@ -63,7 +63,7 @@ head(fs_dat) tspa_fit <- tspa( model = "dem60 ~ ind60", data = fs_dat, - se = list(ind60 = 0.1213615, dem60 = 0.6756472) + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472) ) parameterestimates(tspa_fit) ``` diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 4dfe88e..5956e95 --- a/README.md +++ b/README.md @@ -55,20 +55,20 @@ fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) # get_fs() gives a dataframe with factor scores and standard errors head(fs_dat) -#> fs_ind60 fs_ind60_se ind60_by_fs_ind60 evfs_ind60_ind60 fs_dem60 -#> 1 -0.5261683 0.1213615 0.9657673 0.01472862 -2.7487224 -#> 2 0.1436527 0.1213615 0.9657673 0.01472862 -3.0360803 -#> 3 0.7143559 0.1213615 0.9657673 0.01472862 2.6718589 -#> 4 1.2399257 0.1213615 0.9657673 0.01472862 2.9936997 -#> 5 0.8319080 0.1213615 0.9657673 0.01472862 1.9242932 -#> 6 0.2123845 0.1213615 0.9657673 0.01472862 0.9922798 -#> fs_dem60_se dem60_by_fs_dem60 evfs_dem60_dem60 -#> 1 0.6756472 0.8868049 0.4564991 -#> 2 0.6756472 0.8868049 0.4564991 -#> 3 0.6756472 0.8868049 0.4564991 -#> 4 0.6756472 0.8868049 0.4564991 -#> 5 0.6756472 0.8868049 0.4564991 -#> 6 0.6756472 0.8868049 0.4564991 +#> fs_ind60 fs_ind60_se ind60_by_fs_ind60 ev_fs_ind60 fs_dem60 fs_dem60_se +#> 1 -0.5261683 0.1213615 0.9657673 0.01472862 -2.7487224 0.6756472 +#> 2 0.1436527 0.1213615 0.9657673 0.01472862 -3.0360803 0.6756472 +#> 3 0.7143559 0.1213615 0.9657673 0.01472862 2.6718589 0.6756472 +#> 4 1.2399257 0.1213615 0.9657673 0.01472862 2.9936997 0.6756472 +#> 5 0.8319080 0.1213615 0.9657673 0.01472862 1.9242932 0.6756472 +#> 6 0.2123845 0.1213615 0.9657673 0.01472862 0.9922798 0.6756472 +#> dem60_by_fs_dem60 ev_fs_dem60 +#> 1 0.8868049 0.4564991 +#> 2 0.8868049 0.4564991 +#> 3 0.8868049 0.4564991 +#> 4 0.8868049 0.4564991 +#> 5 0.8868049 0.4564991 +#> 6 0.8868049 0.4564991 ``` ``` r @@ -76,7 +76,7 @@ head(fs_dat) tspa_fit <- tspa( model = "dem60 ~ ind60", data = fs_dat, - se = list(ind60 = 0.1213615, dem60 = 0.6756472) + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472) ) parameterestimates(tspa_fit) #> lhs op rhs est se z pvalue ci.lower ci.upper diff --git a/man/block_diag.Rd b/man/block_diag.Rd new file mode 100644 index 0000000..42ec1fb --- /dev/null +++ b/man/block_diag.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper.R +\name{block_diag} +\alias{block_diag} +\title{Create block diagonal matrix} +\usage{ +block_diag(...) +} +\arguments{ +\item{...}{Either multiple matrices or a list of matrices.} +} +\description{ +Create block diagonal matrix +} diff --git a/man/compute_fscore.Rd b/man/compute_fscore.Rd index 4b34841..7e83622 100644 --- a/man/compute_fscore.Rd +++ b/man/compute_fscore.Rd @@ -8,10 +8,11 @@ compute_fscore( y, lambda, theta, - psi, + psi = NULL, nu = NULL, alpha = NULL, method = c("regression", "Bartlett"), + center_y = TRUE, acov = FALSE, fs_matrices = FALSE ) @@ -33,12 +34,15 @@ is only one observation, it should be a matrix of one row.} \item{method}{A character string indicating the method for computing factor scores. Currently, only "regression" is supported.} +\item{center_y}{Logical indicating whether \code{y} should be mean-centered. +Default to \code{TRUE}.} + \item{acov}{Logical indicating whether the asymptotic covariance matrix of factor scores should be returned as an attribute.} \item{fs_matrices}{Logical indicating whether covariances of the error -portion of factor scores (\code{av_efs}), factor score -loading matrix (\eqn{A}; \code{fsA}) and intercept vector +portion of factor scores (\code{fsT}), factor score +loading matrix (\eqn{L}; \code{fsL}) and intercept vector (\eqn{b}; \code{fsb}) should be returned. The loading and intercept matrices are the implied loadings and intercepts by the model when using the diff --git a/man/get_fs.Rd b/man/get_fs.Rd index 0b2bcd9..5b31095 100644 --- a/man/get_fs.Rd +++ b/man/get_fs.Rd @@ -9,6 +9,8 @@ get_fs( model = NULL, group = NULL, method = c("regression", "Bartlett"), + corrected_fsT = FALSE, + vfsLT = FALSE, ... ) } @@ -28,12 +30,28 @@ analysis, which is passed to \code{\link[lavaan]{cfa}}.} \code{\link[lavaan]{lavPredict}}, but the Bartlett scores have more desirable properties and may be preferred for 2S-PA.} +\item{corrected_fsT}{Logical. Whether to correct for the the sampling +error in the factor score weights when computing +the error variance estimates of factor scores.} + +\item{vfsLT}{Logical. Whether to return the covariance matrix of \code{fsT} +and \code{fsL}, which can be used as input for \code{\link[=vcov_corrected]{vcov_corrected()}} +to obtain corrected covariances and standard errors for +\code{\link[=tspa]{tspa()}} results. This is currently ignored.} + \item{...}{additional arguments passed to \code{\link[lavaan]{cfa}}. See \code{\link[lavaan]{lavOptions}} for a complete list.} } \value{ -A data frame containing the factor scores (with prefix "fs_") and -the standard errors (with suffix "_se"). +A data frame containing the factor scores (with prefix \code{"fs_"}), +the standard errors (with suffix \code{"_se"}), the implied loadings +of factor \code{"_by_"} factor scores, and the error variance-covariance +of the factor scores (with prefix \code{"evfs_"}). The following are +also returned as attributes: +* \code{fsT}: error covariance of factor scores +* \code{fsL}: loading matrix of factor scores +* \code{fsb}: intercepts of factor scores +* \code{scoring_matrix}: weights for computing factor scores from items } \description{ Get Factor Scores and the Corresponding Standard Error of Measurement diff --git a/man/get_fs_lavaan.Rd b/man/get_fs_lavaan.Rd new file mode 100644 index 0000000..067aacd --- /dev/null +++ b/man/get_fs_lavaan.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_fscore.R +\name{get_fs_lavaan} +\alias{get_fs_lavaan} +\title{Get Factor Scores and the Corresponding Standard Error of Measurement} +\usage{ +get_fs_lavaan( + lavobj, + method = c("regression", "Bartlett"), + corrected_fsT = FALSE, + vfsLT = FALSE +) +} +\arguments{ +\item{lavobj}{A lavaan model object when using \code{\link[=get_fs_lavaan]{get_fs_lavaan()}}.} + +\item{method}{Character. Method for computing factor scores (options are +"regression" or "Bartlett"). Currently, the default is +"regression" to be consistent with +\code{\link[lavaan]{lavPredict}}, but the Bartlett scores have +more desirable properties and may be preferred for 2S-PA.} + +\item{corrected_fsT}{Logical. Whether to correct for the the sampling +error in the factor score weights when computing +the error variance estimates of factor scores.} + +\item{vfsLT}{Logical. Whether to return the covariance matrix of \code{fsT} +and \code{fsL}, which can be used as input for \code{\link[=vcov_corrected]{vcov_corrected()}} +to obtain corrected covariances and standard errors for +\code{\link[=tspa]{tspa()}} results. This is currently ignored.} +} +\value{ +A data frame containing the factor scores (with prefix \code{"fs_"}), +the standard errors (with suffix \code{"_se"}), the implied loadings +of factor \code{"_by_"} factor scores, and the error variance-covariance +of the factor scores (with prefix \code{"evfs_"}). The following are +also returned as attributes: +* \code{fsT}: error covariance of factor scores +* \code{fsL}: loading matrix of factor scores +* \code{fsb}: intercepts of factor scores +* \code{scoring_matrix}: weights for computing factor scores from items +} +\description{ +Get Factor Scores and the Corresponding Standard Error of Measurement +} +\examples{ +library(lavaan) +get_fs(PoliticalDemocracy[c("x1", "x2", "x3")]) + +# Multiple factors +get_fs(PoliticalDemocracy[c("x1", "x2", "x3", "y1", "y2", "y3", "y4")], + model = " ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 ") + +# Multiple-group +hs_model <- ' visual =~ x1 + x2 + x3 ' +fit <- cfa(hs_model, + data = HolzingerSwineford1939, + group = "school") +get_fs(HolzingerSwineford1939, hs_model, group = "school") +# Or without the model +get_fs(HolzingerSwineford1939[c("school", "x4", "x5", "x6")], + group = "school") +} diff --git a/man/grandStandardizedSolution.Rd b/man/grand_standardized_solution.Rd similarity index 86% rename from man/grandStandardizedSolution.Rd rename to man/grand_standardized_solution.Rd index d939951..b852254 100644 --- a/man/grandStandardizedSolution.Rd +++ b/man/grand_standardized_solution.Rd @@ -1,9 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/grandStandardizedSolution.R -\name{grandStandardizedSolution} +\name{grand_standardized_solution} +\alias{grand_standardized_solution} \alias{grandStandardizedSolution} \title{Grand Standardized Solution} \usage{ +grand_standardized_solution( + object, + model_list = NULL, + se = TRUE, + acov_par = NULL, + free_list = NULL, + level = 0.95 +) + grandStandardizedSolution( object, model_list = NULL, @@ -49,7 +59,7 @@ mod1 <- ' ' fit1 <- sem(model = mod1, data = PoliticalDemocracy) -grandStandardizedSolution(fit1) +grand_standardized_solution(fit1) ## A single-group, three-factor example mod2 <- ' @@ -63,7 +73,7 @@ mod2 <- ' ' fit2 <- sem(model = mod2, data = PoliticalDemocracy) -grandStandardizedSolution(fit2) +grand_standardized_solution(fit2) ## A multigroup, two-factor example mod3 <- ' @@ -76,7 +86,7 @@ mod3 <- ' fit3 <- sem(mod3, data = HolzingerSwineford1939, group = "school", group.equal = c("loadings", "intercepts")) -grandStandardizedSolution(fit3) +grand_standardized_solution(fit3) ## A multigroup, three-factor example mod4 <- ' @@ -91,5 +101,5 @@ mod4 <- ' fit4 <- sem(mod4, data = HolzingerSwineford1939, group = "school", group.equal = c("loadings", "intercepts")) -grandStandardizedSolution(fit4) +grand_standardized_solution(fit4) } diff --git a/man/tspa.Rd b/man/tspa.Rd index 8d32917..b88f62b 100644 --- a/man/tspa.Rd +++ b/man/tspa.Rd @@ -8,9 +8,11 @@ tspa( model, data, reliability = NULL, - se = NULL, - vc = NULL, - cross_loadings = NULL, + se = "standard", + se_fs = NULL, + fsT = NULL, + fsL = NULL, + fsb = NULL, ... ) } @@ -24,23 +26,29 @@ in \code{lavaan} syntax.} of each latent factor. Currently \code{tspa()} does not support the reliability argument. Please use \code{se}.} -\item{se}{A numeric vector representing the standard errors of each factor -score variable for single-group 2S-PA. A list or data frame -storing the standard errors of each group in each latent factor -for multigroup 2S-PA.} +\item{se}{Deprecated to avoid conflict with the argument of the same name +in \code{\link[lavaan:lavaan]{lavaan::lavaan()}}.} -\item{vc}{An error variance-covariance matrix of the factor scores, which +\item{se_fs}{A numeric vector representing the standard errors of each +factor score variable for single-group 2S-PA. A list or data +frame storing the standard errors of each group in each latent +factor for multigroup 2S-PA.} + +\item{fsT}{An error variance-covariance matrix of the factor scores, which can be obtained from the output of \code{get_fs()} using -\code{attr()} with the argument \code{which = "av_efs"}.} +\code{attr()} with the argument \code{which = "fsT"}.} -\item{cross_loadings}{A matrix of loadings and cross-loadings from the +\item{fsL}{A matrix of loadings and cross-loadings from the latent variables to the factor scores \code{fs}, which -can be obtained from the output of \code{get_fs()} -using \code{attr()} with the argument -\code{which = "fsA"}. +can be obtained from the output of \code{get_fs()} using +\code{attr()} with the argument \code{which = "fsL"}. For details see the multiple-factors vignette: \code{vignette("multiple-factors", package = "R2spa")}.} +\item{fsb}{A vector of intercepts for the factor scores \code{fs}, which can +be obtained from the output of \code{get_fs()} using \code{attr()} +with the argument \code{which = "fsb"}.} + \item{...}{Additional arguments passed to \code{\link[lavaan]{sem}}. See \code{\link[lavaan]{lavOptions}} for a complete list.} } @@ -63,8 +71,8 @@ fs_dat_dem60 <- get_fs(data = PoliticalDemocracy, fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) # tspa model tspa(model = "dem60 ~ ind60", data = fs_dat, - se = c(ind60 = fs_dat_ind60[1, "fs_ind60_se"], - dem60 = fs_dat_dem60[1, "fs_dem60_se"])) + se_fs = c(ind60 = fs_dat_ind60[1, "fs_ind60_se"], + dem60 = fs_dat_dem60[1, "fs_dem60_se"])) # single-group, three-factor example mod2 <- " @@ -77,8 +85,8 @@ fs_dat2 <- get_fs(PoliticalDemocracy, model = mod2, std.lv = TRUE) tspa(model = "dem60 ~ ind60 dem65 ~ ind60 + dem60", data = fs_dat2, - vc = attr(fs_dat2, "av_efs"), - cross_loadings = attr(fs_dat2, "fsA")) + fsT = attr(fs_dat2, "fsT"), + fsL = attr(fs_dat2, "fsL")) # multigroup, two-factor example mod3 <- " @@ -90,8 +98,8 @@ fs_dat3 <- get_fs(HolzingerSwineford1939, model = mod3, std.lv = TRUE, group = "school") tspa(model = "visual ~ speed", data = fs_dat3, - vc = attr(fs_dat3, "av_efs"), - cross_loadings = attr(fs_dat3, "fsA"), + fsT = attr(fs_dat3, "fsT"), + fsL = attr(fs_dat3, "fsL"), group = "school") # multigroup, three-factor example @@ -100,15 +108,14 @@ mod4 <- " visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 - " fs_dat4 <- get_fs(HolzingerSwineford1939, model = mod4, std.lv = TRUE, group = "school") tspa(model = "visual ~ speed textual ~ visual + speed", data = fs_dat4, - vc = attr(fs_dat4, "av_efs"), - cross_loadings = attr(fs_dat4, "fsA"), + fsT = attr(fs_dat4, "fsT"), + fsL = attr(fs_dat4, "fsL"), group = "school") # get factor scores @@ -124,15 +131,15 @@ fs_hs <- cbind(do.call(rbind, fs_dat_visual), # tspa model tspa(model = "visual ~ speed", data = fs_hs, - se = data.frame(visual = c(0.3391326, 0.311828), - speed = c(0.2786875, 0.2740507)), + se_fs = data.frame(visual = c(0.3391326, 0.311828), + speed = c(0.2786875, 0.2740507)), group = "school", group.equal = "regressions") # manually adding equality constraints on the regression coefficients tspa(model = "visual ~ c(b1, b1) * speed", data = fs_hs, - se = list(visual = c(0.3391326, 0.311828), - speed = c(0.2786875, 0.2740507)), + se_fs = list(visual = c(0.3391326, 0.311828), + speed = c(0.2786875, 0.2740507)), group = "school") } diff --git a/man/tspa_mx_model.Rd b/man/tspa_mx_model.Rd new file mode 100644 index 0000000..3d14a98 --- /dev/null +++ b/man/tspa_mx_model.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tspa_mx.R +\name{tspa_mx_model} +\alias{tspa_mx_model} +\title{Two-Stage Path Analysis with Definition Variables Using OpenMx} +\usage{ +tspa_mx_model(mx_model, data, mat_ld, mat_vc, ...) +} +\arguments{ +\item{mx_model}{A model object of class \link[OpenMx:MxRAMModel-class]{OpenMx::MxRAMModel}, created +by \code{\link[OpenMx:mxModel]{OpenMx::mxModel()}} or the \code{umx} package. This +is the structural model part.} + +\item{data}{A data frame containing factor scores.} + +\item{mat_ld}{A \eqn{p \times p} matrix indicating the loadings of the +factor scores on the latent variables. The \emph{i}th row indicate +loadings of the \emph{i}th factor score variable on the latent +variables. This could be one of the following: +\itemize{ +\item A matrix created by \code{\link[OpenMx:mxMatrix]{OpenMx::mxMatrix()}} with loading +values. +\item A named numeric matrix, with rownames and column names +matching the factor score and latent variables. +\item A named character matrix, with rownames and column names +matching the factor score and latent variables, and the +character values indicating the variable names in \code{data} for +the corresponding loadings. +}} + +\item{mat_vc}{Similar to \code{mat_ld} but for the error variance-covariance +matrix of the factor scores.} + +\item{...}{Additional arguments passed to \code{\link[OpenMx:mxModel]{OpenMx::mxModel()}}.} +} +\value{ +An object of class \link[OpenMx:MxModel-class]{OpenMx::MxModel}. Note that the +model has not been run. +} +\description{ +Fit a two-stage path analysis (2S-PA) model. +} +\examples{ +library(mirt) +library(umx) +library(OpenMx) +# Simulate data with mirt +set.seed(1324) +num_obs <- 100 +# Simulate theta +eta <- MASS::mvrnorm(num_obs, mu = c(0, 0), Sigma = diag(c(1, 1 - 0.5^2)), + empirical = TRUE) +th1 <- eta[, 1] +th2 <- -1 + 0.5 * th1 + eta[, 2] +# items and response data +a1 <- matrix(1, 10) +d1 <- matrix(rnorm(10)) +a2 <- matrix(runif(10, min = 0.5, max = 1.5)) +d2 <- matrix(rnorm(10)) +dat1 <- mirt::simdata(a = a1, d = d1, + N = num_obs, itemtype = "2PL", Theta = th1) +dat2 <- mirt::simdata(a = a2, d = d2, N = num_obs, + itemtype = "2PL", Theta = th2) +# Factor scores +mod1 <- mirt(dat1, model = 1, itemtype = "Rasch", verbose = FALSE) +mod2 <- mirt(dat2, model = 1, itemtype = "2PL", verbose = FALSE) +fs1 <- fscores(mod1, full.scores.SE = TRUE) +fs2 <- fscores(mod2, full.scores.SE = TRUE) +# Combine factor scores and standard errors into data set +fs_dat <- as.data.frame(cbind(fs1, fs2)) +names(fs_dat) <- c("fs1", "se_fs1", "fs2", "se_fs2") +# Compute reliability and error variances +fs_dat <- within(fs_dat, expr = { + rel_fs1 <- 1 - se_fs1^2 + rel_fs2 <- 1 - se_fs2^2 + ev_fs1 <- se_fs1^2 * (1 - se_fs1^2) + ev_fs2 <- se_fs2^2 * (1 - se_fs2^2) +}) +# OpenMx model (from umx so that lavaan syntax can be used) +fsreg_umx <- umxLav2RAM( + " + fs2 ~ fs1 + fs2 + fs1 ~ 1 + ", + printTab = FALSE) +# Prepare loading and error covariance matrices +cross_load <- matrix(c("rel_fs2", NA, NA, "rel_fs1"), nrow = 2) |> + `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +err_cov <- matrix(c("ev_fs2", NA, NA, "ev_fs1"), nrow = 2) |> + `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +# Create 2S-PA model (with definition variables) +tspa_mx <- + tspa_mx_model(fsreg_umx, + data = fs_dat, + mat_ld = cross_load, + mat_vc = err_cov + ) +# Run OpenMx +tspa_mx_fit <- mxRun(tspa_mx) +# Summarize the results +summary(tspa_mx_fit) +} diff --git a/man/tspa_plot.Rd b/man/tspa_plot.Rd new file mode 100644 index 0000000..3fc96c2 --- /dev/null +++ b/man/tspa_plot.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tspa_plot.R +\name{tspa_plot} +\alias{tspa_plot} +\title{Diagnostic plots of fitted 2S-PA model} +\usage{ +tspa_plot( + tspa_fit, + title = NULL, + label_x = NULL, + label_y = NULL, + abbreviation = TRUE, + fscores_type = c("original", "lavaan"), + ask = FALSE, + ... +) +} +\arguments{ +\item{tspa_fit}{An object of class \link[=lavaan-class]{lavaan}, +representing the output generated from the \code{\link[=tspa]{tspa()}} +function.} + +\item{title}{Character. Set the name of scatter plot. The default value is +"Scatterplot".} + +\item{label_x}{Character. Set the name of the x-axis. The default value is +"fs_" followed by variable names.} + +\item{label_y}{Character. Set the name of the y-axis. The default value is +"fs_" followed by variable names.} + +\item{abbreviation}{Logic input. If \code{FALSE} is indicated, the group name +will be shown in full. The default setting is \code{TRUE}.} + +\item{fscores_type}{Character. Set the type of factor score for input. +The default setting is using factor score from observed +data (i.e., output from \code{\link[=get_fs]{get_fs()}}). If +\code{fscore_type = "est"}, then it will use output from +\code{\link[lavaan:lavPredict]{lavaan::lavPredict()}}.} + +\item{ask}{Logic input. If \code{TRUE} is indicated, the user will be asked before +before each plot is generated. The default setting is 'False'.} + +\item{...}{Additional arguments passed to \code{\link[graphics]{plot}}. See +\code{\link[graphics]{plot}} for a list.} +} +\value{ +A scatterplot between factor scores, and a residual plot. +} +\description{ +Diagnostic plots of fitted 2S-PA model +} +\examples{ +library(lavaan) +model <- " +# latent variable definitions +ind60 =~ x1 + x2 + x3 +dem60 =~ y1 + a*y2 + b*y3 + c*y4 +# regressions +dem60 ~ ind60 +" +fs_dat_ind60 <- get_fs( + data = PoliticalDemocracy, + model = "ind60 =~ x1 + x2 + x3" +) +fs_dat_dem60 <- get_fs( + data = PoliticalDemocracy, + model = "dem60 =~ y1 + y2 + y3 + y4" +) +fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) + +tspa_fit <- tspa( + model = "dem60 ~ ind60", + data = fs_dat, + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472) +) +tspa_plot(tspa_fit) +} diff --git a/man/vcov_corrected.Rd b/man/vcov_corrected.Rd new file mode 100644 index 0000000..9a97a1a --- /dev/null +++ b/man/vcov_corrected.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tspa_corrected_se.R +\name{vcov_corrected} +\alias{vcov_corrected} +\title{First-order correction of sampling covariance for 2S-PA estimates} +\usage{ +vcov_corrected(tspa_fit, vfsLT, which_free = NULL, ...) +} +\arguments{ +\item{tspa_fit}{A fitted model from \code{\link[=tspa]{tspa()}}.} + +\item{vfsLT}{The sampling covariance matrix of \code{fsL} and \code{fsT}, which can be +obtained with \code{\link[=get_fs]{get_fs()}} with the argument \code{vfsLT = TRUE}.} + +\item{which_free}{An optional numeric vector indicating which parameters +in \code{fsL} and \code{fsT} are free. The parameters are ordered +by the \code{fsL} matrix and the lower-triangular part of +\code{fsT}, by columns. For example, for a two-factor model, +\code{fsL} and \code{fsT} are both 2 x 2 matrices, and the +error covariance between the two factor scores (i.e., +the \verb{[2, 1]} element in \code{fsT}) has an index of 6.} + +\item{...}{Currently not used.} +} +\value{ +A corrected covariance matrix in the same dimension as +\code{vcov(tspa_fit)}. +} +\description{ +First-order correction of sampling covariance for 2S-PA estimates +} diff --git a/tests/testthat.R b/tests/testthat.R old mode 100755 new mode 100644 diff --git a/tests/testthat/test-compute_fscore.R b/tests/testthat/test-compute_fscore.R index a2dcb81..096d0e5 100644 --- a/tests/testthat/test-compute_fscore.R +++ b/tests/testthat/test-compute_fscore.R @@ -54,10 +54,12 @@ test_that("ACOV = Var(e.fs) for Bartlett scores", { method = "Bartlett", fs_matrices = TRUE) expect_equal(fs_lavaan2, fs_hand2, ignore_attr = TRUE) - expect_equal(attr(fs_lavaan2, "acov")[[1]], - attr(fs_hand2, "acov")) - expect_equal(attr(fs_lavaan2, "acov")[[1]], - attr(fs_hand2, "av_efs")) + expect_equal(attr(fs_hand2, "acov"), attr(fs_lavaan2, "acov")[[1]]) + expect_equal(attr(fs_hand2, "fsT"), attr(fs_lavaan2, "acov")[[1]], + ignore_attr = TRUE) + # From Issue 56 + expect_equal(rownames(attr(fs_hand2, "fsT")), + paste0("fs_", rownames(attr(fs_lavaan2, "acov")[[1]]))) }) test_that("Same factor scores with same priors", { @@ -140,13 +142,17 @@ test_that("Correct scoring matrix for regression scores", { fs_matrices = TRUE) a_mat1 <- est[[1]]$psi %*% crossprod(est[[1]]$lambda, MASS::ginv(fit@implied$cov[[1]])) - fsA1 <- attr(fs1_hand, "fsA") - expect_equal(a_mat1 %*% est[[1]]$lambda, fsA1) + fsL1 <- attr(fs1_hand, "fsL") + expect_equal(a_mat1 %*% est[[1]]$lambda, fsL1, ignore_attr = TRUE) + # Issue 56 + expect_equal(colnames(fsL1), rownames(a_mat1)) + expect_equal(rownames(fsL1), paste0("fs_", colnames(fsL1))) expect_equal(a_mat1 %*% cov(y[[1]]) %*% t(a_mat1), expected = cov(fs1_hand)) implied_covfs1 <- a_mat1 %*% fit@implied$cov[[1]] %*% t(a_mat1) - expect_equal(attr(fs1_hand, "av_efs"), - expected = implied_covfs1 - fsA1 %*% est[[1]]$psi %*% t(fsA1)) + expect_equal(attr(fs1_hand, "fsT"), + expected = implied_covfs1 - fsL1 %*% est[[1]]$psi %*% t(fsL1), + ignore_attr = TRUE) fs2_hand <- compute_fscore(y[[2]], lambda = est[[2]]$lambda, theta = est[[2]]$theta, @@ -158,13 +164,14 @@ test_that("Correct scoring matrix for regression scores", { est[[2]]$theta a_mat2 <- est[[1]]$psi %*% crossprod(est[[2]]$lambda, MASS::ginv(implied_cov2)) - fsA2 <- attr(fs2_hand, "fsA") - expect_equal(a_mat2 %*% est[[2]]$lambda, fsA2) + fsL2 <- attr(fs2_hand, "fsL") + expect_equal(a_mat2 %*% est[[2]]$lambda, fsL2, ignore_attr = TRUE) expect_equal(a_mat2 %*% cov(y[[2]]) %*% t(a_mat2), expected = cov(fs2_hand)) implied_covfs2 <- a_mat2 %*% fit@implied$cov[[2]] %*% t(a_mat2) - expect_equal(attr(fs2_hand, "av_efs"), - expected = implied_covfs2 - fsA2 %*% est[[2]]$psi %*% t(fsA2)) + expect_equal(attr(fs2_hand, "fsT"), + expected = implied_covfs2 - fsL2 %*% est[[2]]$psi %*% t(fsL2), + ignore_attr = TRUE) }) test_that("Correct scoring matrix for Bartlett scores", { @@ -179,13 +186,15 @@ test_that("Correct scoring matrix for Bartlett scores", { tlam_invth1 <- crossprod(est[[1]]$lambda, MASS::ginv(est[[1]]$theta)) a_mat1 <- solve(tlam_invth1 %*% est[[1]]$lambda, tlam_invth1) - fsA1 <- attr(fs1_hand, "fsA") - expect_equal(fsA1, diag(3), ignore_attr = TRUE) + fsL1 <- attr(fs1_hand, "fsL") + expect_equal(fsL1, diag(3), ignore_attr = TRUE) expect_equal(a_mat1 %*% cov(y[[1]]) %*% t(a_mat1), - expected = cov(fs1_hand)) + expected = cov(fs1_hand), + ignore_attr = TRUE) implied_covfs1 <- a_mat1 %*% fit@implied$cov[[1]] %*% t(a_mat1) - expect_equal(attr(fs1_hand, "av_efs"), - expected = implied_covfs1 - fsA1 %*% est[[1]]$psi %*% t(fsA1)) + expect_equal(attr(fs1_hand, "fsT"), + expected = implied_covfs1 - fsL1 %*% est[[1]]$psi %*% t(fsL1), + ignore_attr = TRUE) fs2_hand <- compute_fscore(y[[2]], lambda = est[[2]]$lambda, theta = est[[2]]$theta, @@ -199,11 +208,29 @@ test_that("Correct scoring matrix for Bartlett scores", { tlam_invth2 <- crossprod(est[[2]]$lambda, MASS::ginv(est[[2]]$theta)) a_mat2 <- solve(tlam_invth2 %*% est[[2]]$lambda, tlam_invth2) - fsA2 <- attr(fs2_hand, "fsA") - expect_equal(fsA2, diag(3), ignore_attr = TRUE) + fsL2 <- attr(fs2_hand, "fsL") + expect_equal(fsL2, diag(3), ignore_attr = TRUE) expect_equal(a_mat2 %*% cov(y[[2]]) %*% t(a_mat2), - expected = cov(fs2_hand)) + expected = cov(fs2_hand), ignore_attr = TRUE) implied_covfs2 <- a_mat2 %*% fit@implied$cov[[2]] %*% t(a_mat2) - expect_equal(attr(fs2_hand, "av_efs"), - expected = implied_covfs2 - fsA2 %*% est[[2]]$psi %*% t(fsA2)) + expect_equal(attr(fs2_hand, "fsT"), + expected = implied_covfs2 - fsL2 %*% est[[2]]$psi %*% t(fsL2), + ignore_attr = TRUE) +}) + +test_that("Correction factor shrinks to zero in large sample", { + cov1 <- lavInspect(fit, what = "implied")[[1]]$cov + fit_small <- cfa(" visual =~ x1 + x2 + x3 ", + sample.cov = cov1, sample.nobs = 50) + c1 <- correct_evfs(fit_small, method = "regression")[[1]] + fit_medium <- cfa(" visual =~ x1 + x2 + x3 ", + sample.cov = cov1, sample.nobs = 60) + c2 <- correct_evfs(fit_medium, method = "regression")[[1]] + fit_large <- cfa(" visual =~ x1 + x2 + x3 ", + sample.cov = cov1, sample.nobs = 1e6) + c3 <- correct_evfs(fit_large, method = "regression")[[1]] + expect_lt(c2, c1) + expect_lt(c3, 1e-4) + c1b <- correct_evfs(fit_small, method = "Bartlett")[[1]] + expect_gt(c1b, c1) }) diff --git a/tests/testthat/test-get_fscore.R b/tests/testthat/test-get_fscore.R old mode 100755 new mode 100644 index d54a782..268007a --- a/tests/testthat/test-get_fscore.R +++ b/tests/testthat/test-get_fscore.R @@ -1,5 +1,3 @@ - -####################################### Test get_fs() function ###################################### # Loading packages and functions library(lavaan) @@ -29,7 +27,7 @@ test_that("test the data input", { # Class of output -test_that("Test that the function gives an output of data frame", { +test_that("Gives an output of data frame", { expect_s3_class(test_object_fs, "data.frame") }) @@ -46,33 +44,39 @@ test_that("Test the number of factors is equal", { expect_equal(length(test_object_fs), expected_cols) }) -test_that("Test that the number of rows is the same as the original data", { +test_that("Number of rows is the same as the original data", { expect_identical(nrow(test_object_fs), nrow(PoliticalDemocracy)) }) # Test standard error -test_that("Test that standard errors for each observation are the same", { +test_that("Standard errors for each observation are the same", { fs_names <- colnames(test_object_fs) names_se <- grep("_se", fs_names, value = TRUE) for (j in names_se) { - expect_identical(var(test_object_fs[[j]]), 0) + expect_identical(var(test_object_fs[[j]]), 0) } }) -test_that("Test that standard errors for each observation are positive numbers and within 1", { - fs_names <- colnames(test_object_fs) - names_se <- grep("_se", fs_names, value = TRUE) - for (j in names_se) { - expect_gt(min(test_object_fs[[j]]), 0) - expect_lt(max(test_object_fs[[j]]), 1) +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + fs_names <- colnames(test_object_fs) + names_se <- grep("_se", fs_names, value = TRUE) + for (j in names_se) { + expect_gt(min(test_object_fs[[j]]), 0) + expect_lt(max(test_object_fs[[j]]), 1) + } } -}) +) # Bartlett scores -test_object_fs_bar <- get_fs(PoliticalDemocracy, single_model, method = "Bartlett") +test_object_fs_bar <- get_fs( + PoliticalDemocracy, single_model, + method = "Bartlett" +) -test_that("Test that standard errors for each observation are the same", { +test_that("Standard errors for each observation are the same", { fs_names <- colnames(test_object_fs_bar) names_se <- grep("_se", fs_names, value = TRUE) for (j in names_se) { @@ -80,14 +84,17 @@ test_that("Test that standard errors for each observation are the same", { } }) -test_that("Test that standard errors for each observation are positive numbers and within 1", { - fs_names <- colnames(test_object_fs) - names_se <- grep("_se", fs_names, value = TRUE) - for (j in names_se) { - expect_gt(min(test_object_fs_bar[[j]]), 0) - expect_lt(max(test_object_fs_bar[[j]]), 1) +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + fs_names <- colnames(test_object_fs) + names_se <- grep("_se", fs_names, value = TRUE) + for (j in names_se) { + expect_gt(min(test_object_fs_bar[[j]]), 0) + expect_lt(max(test_object_fs_bar[[j]]), 1) + } } -}) +) ########## multi-group examples ########## @@ -100,15 +107,17 @@ hs_model <- 'visual =~ x1 + x2 + x3' # data = HolzingerSwineford1939, # group = "school") #get_fs(HolzingerSwineford1939, hs_model, group = "school") -test_object_fs_multi <- get_fs(HolzingerSwineford1939[c("school", "x1", "x2", "x3")], - hs_model, - group = "school") +test_object_fs_multi <- get_fs( + HolzingerSwineford1939[c("school", "x1", "x2", "x3")], + hs_model, + group = "school" +) ########## Testing section ############ # Class of output -test_that("Test the number of factors is equal", { +test_that("Number of factors is equal", { expected_cols <- ncol_cfa( cfa(model = hs_model, data = PoliticalDemocracy) ) @@ -116,7 +125,7 @@ test_that("Test the number of factors is equal", { expect_equal(length(test_object_fs_multi[[1]]), expected_cols + 1) }) -test_that("Test that the number of rows is the same as the original data", { +test_that("Number of rows is the same as the original data", { expect_identical(nrow(test_object_fs_multi[[1]]) + nrow(test_object_fs_multi[[2]]), nrow(HolzingerSwineford1939)) @@ -124,43 +133,59 @@ test_that("Test that the number of rows is the same as the original data", { # Test standard errors -test_that("Test that standard errors for each observation are the same within groups", { - test_se <- vapply(test_object_fs_multi, - FUN = function(x) var(x$fs_visual_se), - FUN.VALUE = numeric(1)) - for (i in length(test_se)) { - expect_identical(unname(test_se[i]), 0) +test_that( + "Standard errors for each observation are the same within groups", + { + test_se <- vapply(test_object_fs_multi, + FUN = function(x) var(x$fs_visual_se), + FUN.VALUE = numeric(1) + ) + for (i in length(test_se)) { + expect_identical(unname(test_se[i]), 0) + } } -}) - -test_that("Test that standard errors for each observation are positive numbers and within 1", { - expect_gt(min(test_object_fs_multi[[1]]$fs_visual_se), 0) - expect_lt(max(test_object_fs_multi[[1]]$fs_visual_se), 1) - expect_gt(min(test_object_fs_multi[[2]]$fs_visual_se), 0) - expect_lt(max(test_object_fs_multi[[2]]$fs_visual_se), 1) -}) +) + +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + expect_gt(min(test_object_fs_multi[[1]]$fs_visual_se), 0) + expect_lt(max(test_object_fs_multi[[1]]$fs_visual_se), 1) + expect_gt(min(test_object_fs_multi[[2]]$fs_visual_se), 0) + expect_lt(max(test_object_fs_multi[[2]]$fs_visual_se), 1) + } +) # Bartlett scores -test_object_fs_multi_bar <- get_fs(HolzingerSwineford1939[c("school", "x1", "x2", "x3")], - hs_model, - group = "school", - method = "Bartlett") - -test_that("Test that standard errors for each observation are the same within groups", { - test_se <- vapply(test_object_fs_multi_bar, - FUN = function(x) var(x$fs_visual_se), - FUN.VALUE = numeric(1)) - for (i in length(test_se)) { - expect_identical(unname(test_se[i]), 0) +test_object_fs_multi_bar <- get_fs( + HolzingerSwineford1939[c("school", "x1", "x2", "x3")], + hs_model, + group = "school", + method = "Bartlett" +) + +test_that( + "Standard errors for each observation are the same within groups", + { + test_se <- vapply(test_object_fs_multi_bar, + FUN = function(x) var(x$fs_visual_se), + FUN.VALUE = numeric(1) + ) + for (i in length(test_se)) { + expect_identical(unname(test_se[i]), 0) + } } -}) - -test_that("Test that standard errors for each observation are positive numbers and within 1", { - expect_gt(min(test_object_fs_multi_bar[[1]]$fs_visual_se), 0) - expect_lt(max(test_object_fs_multi_bar[[1]]$fs_visual_se), 1) - expect_gt(min(test_object_fs_multi_bar[[2]]$fs_visual_se), 0) - expect_lt(max(test_object_fs_multi_bar[[2]]$fs_visual_se), 1) -}) +) + +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + expect_gt(min(test_object_fs_multi_bar[[1]]$fs_visual_se), 0) + expect_lt(max(test_object_fs_multi_bar[[1]]$fs_visual_se), 1) + expect_gt(min(test_object_fs_multi_bar[[2]]$fs_visual_se), 0) + expect_lt(max(test_object_fs_multi_bar[[2]]$fs_visual_se), 1) + } +) ###### Multiple factors example ##### @@ -176,14 +201,14 @@ test_object_fs_multi_2 <- get_fs(HolzingerSwineford1939, # Class of output -test_that("Test the number of factors is equal", { +test_that("Number of factors is equal", { expected_cols <- ncol_cfa( cfa(model = hs_model_2, data = HolzingerSwineford1939) ) expect_equal(length(test_object_fs_multi_2[[1]]), expected_cols + 1) }) -test_that("Test that the number of rows is the same as the original data", { +test_that("Number of rows is the same as the original data", { expect_identical(nrow(test_object_fs_multi_2[[1]]) + nrow(test_object_fs_multi_2[[2]]), nrow(HolzingerSwineford1939)) @@ -191,50 +216,94 @@ test_that("Test that the number of rows is the same as the original data", { # Test standard errors -test_that("Test that standard errors for each observation are the same within groups", { - fs_names <- colnames(test_object_fs_multi_2[[1]]) - names_se <- grep("_se", fs_names, value = TRUE) - for (i in names_se) { - test_se <- vapply(test_object_fs_multi_2, - FUN = function(x) var(x[[i]]), - FUN.VALUE = numeric(1)) - expect_identical(max(test_se), 0) +test_that( + "Standard errors for each observation are the same within groups", + { + fs_names <- colnames(test_object_fs_multi_2[[1]]) + names_se <- grep("_se", fs_names, value = TRUE) + for (i in names_se) { + test_se <- vapply(test_object_fs_multi_2, + FUN = function(x) var(x[[i]]), + FUN.VALUE = numeric(1) + ) + expect_identical(max(test_se), 0) + } } -}) - -test_that("Test that standard errors for each observation are positive numbers and within 1", { - rb_test_object <- do.call(rbind, test_object_fs_multi_2) - fs_names <- colnames(rb_test_object) - names_se <- grep("_se", fs_names, value = TRUE) - for (i in names_se) { - expect_gt(min(rb_test_object[,i]), 0) - expect_lt(max(rb_test_object[,i]), 1) +) + +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + rb_test_object <- do.call(rbind, test_object_fs_multi_2) + fs_names <- colnames(rb_test_object) + names_se <- grep("_se", fs_names, value = TRUE) + for (i in names_se) { + expect_gt(min(rb_test_object[, i]), 0) + expect_lt(max(rb_test_object[, i]), 1) + } } -}) +) # Bartlett scores test_object_fs_multi_2_bar <- get_fs(HolzingerSwineford1939, hs_model_2, group = "school") -test_that("Test that standard errors for each observation are the same within groups", { - rb_test_object <- do.call(rbind, test_object_fs_multi_2_bar) - fs_names <- colnames(rb_test_object) - names_se <- grep("_se", fs_names, value = TRUE) - for (i in names_se) { - test_se <- tapply(rb_test_object[[i]], - rb_test_object$school, var) - expect_identical(max(test_se), 0) +test_that( + "Standard errors for each observation are the same within groups", + { + rb_test_object <- do.call(rbind, test_object_fs_multi_2_bar) + fs_names <- colnames(rb_test_object) + names_se <- grep("_se", fs_names, value = TRUE) + for (i in names_se) { + test_se <- tapply( + rb_test_object[[i]], + rb_test_object$school, var + ) + expect_identical(max(test_se), 0) + } } -}) - -test_that("Test that standard errors for each observation are positive numbers and within 1", { - rb_test_object <- do.call(rbind, test_object_fs_multi_2_bar) - fs_names <- colnames(rb_test_object) - names_se <- grep("_se", fs_names, value = TRUE) - for (i in names_se) { +) + +test_that( + "Standard errors for each observation are positive numbers and within 1", + { + rb_test_object <- do.call(rbind, test_object_fs_multi_2_bar) + fs_names <- colnames(rb_test_object) + names_se <- grep("_se", fs_names, value = TRUE) + for (i in names_se) { expect_gt(min(rb_test_object[, i]), 0) expect_lt(max(rb_test_object[, i]), 1) + } } +) + +fs_config <- get_fs(HolzingerSwineford1939, + hs_model_2, + group = "school", + corrected_fsT = TRUE +) + +fs_metric <- get_fs(HolzingerSwineford1939, + hs_model_2, + group = "school", + group.equal = "loadings", + corrected_fsT = TRUE +) + +fs_single <- get_fs( + HolzingerSwineford1939 |> + subset(school == "Grant-White"), + hs_model_2, + corrected_fsT = TRUE +) + +test_that("Correction factor is similar with single or multiple groups", { + fst1 <- attr(fs_config, "fsT") + fst2 <- attr(fs_metric, "fsT") + fst3 <- attr(fs_single, "fsT") + expect_equal(fst1[[2]], fst3, tolerance = 1e-5) + d1 <- fst1[[1]] - fst1[[2]] + d2 <- fst2[[1]] - fst2[[2]] + expect_lt(mean(abs(d2)), mean(abs(d1))) }) - diff --git a/tests/testthat/test-grandStandardizedSolution.R b/tests/testthat/test-grandStandardizedSolution.R index 7c08c4a..efe54c3 100644 --- a/tests/testthat/test-grandStandardizedSolution.R +++ b/tests/testthat/test-grandStandardizedSolution.R @@ -50,7 +50,7 @@ test_that("Standardized beta in a model with single group, three factors", { expect_equal(s3_std_beta_lav$est.std, s3_std_beta$est.std) }) test_that("SE of standardized beta in a model with single group, three factors", { - expect_equal(s3_std_beta_lav$se, s3_std_beta$se) + expect_equal(s3_std_beta_lav$se, s3_std_beta$se, tolerance = err / 100) }) # Multigroup, two-factor ------------------------------------------------------- diff --git a/tests/testthat/test-tspa.R b/tests/testthat/test-tspa.R old mode 100755 new mode 100644 index f77e46a..f77e661 --- a/tests/testthat/test-tspa.R +++ b/tests/testthat/test-tspa.R @@ -2,6 +2,8 @@ # Loading packages and functions library(lavaan) +library(OpenMx) +library(umx) ########## Single-group example ########## @@ -10,14 +12,14 @@ library(lavaan) # Example 1: Single-group with two variables # CFA model -cfa_single1 <- ' - # latent variables - ind60 =~ x1 + x2 + x3 - ' -cfa_single2 <- ' - # latent variables - dem60 =~ y1 + y2 + y3 + y4 - ' +cfa_single1 <- " +# latent variables +ind60 =~ x1 + x2 + x3 +" +cfa_single2 <- " +# latent variables +dem60 =~ y1 + y2 + y3 + y4 +" # get factor scores fs_single1 <- get_fs(PoliticalDemocracy, cfa_single1) @@ -45,23 +47,25 @@ tspa_single <- tspa( model = "dem60 ~ ind60", data = fs_dat_single, - se = c(ind60 = 0.1213615, dem60 = 0.6756472) + se_fs = c(ind60 = 0.1213615, dem60 = 0.6756472) ) ########## Testing section ############ # Class of input var_len <- 2 -se = c(ind60 = 0.1213615, dem60 = 0.6756472) +se <- c(ind60 = 0.1213615, dem60 = 0.6756472) # The tspa data should be composed of two parts: variable, and se -test_that("test the number of columns in tspa data are multiples of the variable length", - { - expect_gt(ncol(fs_dat_single), 1) - expect_equal(ncol(fs_dat_single) %% var_len, 0) - }) +test_that( + "Number of columns in tspa data are multiples of the variable length", + { + expect_gt(ncol(fs_dat_single), 1) + expect_equal(ncol(fs_dat_single) %% var_len, 0) + } +) -test_that ("Test the data variable names should contain prefix (fs_)", { +test_that("Test the data variable names should contain prefix (fs_)", { fs_names <- colnames(fs_dat_single) expect_true(all(grepl("fs_", fs_names))) }) @@ -70,22 +74,32 @@ test_that ("Test the data variable names should contain prefix (fs_)", { # Parameter estimates -test_that("test if the regression coefficients of factors are the same for two methods", - { - expect_equal(coef(cfa_single)["dem60~ind60"], - coef(tspa_single)["dem60~ind60"]) - }) - -test_that("test if the se of regression coefficients are the same for two methods", - { - expect_equal( - vcov(cfa_single)[c("dem60~ind60", "v1", "v2"), - c("dem60~ind60", "v1", "v2")], - vcov(tspa_single)[c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"), - c("dem60~ind60", "ind60~~ind60", "dem60~~dem60")], - ignore_attr = TRUE - ) - }) +test_that( + "Regression coefficients of factors are the same for two methods", + { + expect_equal( + coef(cfa_single)["dem60~ind60"], + coef(tspa_single)["dem60~ind60"] + ) + } +) + +test_that( + "se of regression coefficients are the same for two methods", + { + expect_equal( + vcov(cfa_single)[ + c("dem60~ind60", "v1", "v2"), + c("dem60~ind60", "v1", "v2") + ], + vcov(tspa_single)[ + c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"), + c("dem60~ind60", "ind60~~ind60", "dem60~~dem60") + ], + ignore_attr = TRUE + ) + } +) # Fit measures @@ -133,13 +147,87 @@ tspa_3var <- tspa( model = "dem60 ~ ind60 dem65 ~ ind60 + dem60", data = fs_dat_3var, - se = c( + se_fs = c( ind60 = 0.1213615, dem60 = 0.6756472, dem65 = 0.5724405 ) ) +# Compare to Mx +model_umx <- umxLav2RAM(" + fs_dem60 ~ fs_ind60 + fs_dem65 ~ fs_ind60 + fs_dem60 + fs_dem65 + fs_dem60 + fs_ind60 ~ 1 + ", printTab = FALSE) +# Loading +matL <- mxMatrix( + type = "Iden", nrow = 3, + free = FALSE, + name = "L" +) +# Error +matE <- mxMatrix( + type = "Diag", nrow = 3, ncol = 3, + free = FALSE, + values = c(0.6756472, 0.5724405, 0.1213615)^2, + name = "E" +) +tspa_mx <- tspa_mx_model(model_umx, data = fs_dat_3var, + mat_ld = matL, mat_vc = matE) +tspa_mx_fit <- mxRun(tspa_mx) +# Check same coefficients and standard errors +test_that("test same regression coefficients with Mx", { + expect_equal( + coef(tspa_mx_fit)[c(2, 3, 1, 6, 4, 5)], + expected = coef(tspa_3var), + tolerance = 1e-5, + ignore_attr = TRUE + ) +}) +test_that("test same standard errors with Mx", { + vc_mx <- diag(vcov(tspa_mx_fit)) + vc_lavaan <- diag(vcov(tspa_3var)) + expect_equal( + vc_mx[c(2, 3, 1, 6, 4, 5)], + expected = vc_lavaan, + tolerance = 1e-4, + ignore_attr = TRUE + ) +}) +# Use numeric matrices +tspa_mx2 <- tspa_mx_model( + model_umx, + data = fs_dat_3var, + mat_ld = diag(3) |> + `dimnames<-`(list( + c("fs_ind60", "fs_dem60", "fs_dem65"), + c("ind60", "dem60", "dem65") + )), + mat_vc = diag(c(0.1213615, 0.6756472, 0.5724405)^2) |> + `dimnames<-`(rep(list(c("fs_ind60", "fs_dem60", "fs_dem65")), 2)) +) +tspa_mx_fit2 <- mxRun(tspa_mx2) +# Use column names for VC +err_cov <- matrix(c("ev_fs_ind60", NA, NA, + NA, "ev_fs_dem60", NA, + NA, NA, "ev_fs_dem65"), nrow = 3) |> + `dimnames<-`(rep(list(c("fs_ind60", "fs_dem60", "fs_dem65")), 2)) +tspa_mx3 <- tspa_mx_model(model_umx, data = fs_dat_3var, + mat_ld = matL, mat_vc = err_cov) +tspa_mx_fit3 <- mxRun(tspa_mx3) +test_that("Same results with different Mx matrices input", { + expect_equal( + coef(tspa_mx_fit2), + expected = coef(tspa_mx_fit) + ) + expect_equal( + coef(tspa_mx_fit3), + expected = coef(tspa_mx_fit), + tolerance = 1e-5 + ) +}) + ########## Testing section ############# # Standardized parameter estimates @@ -148,21 +236,25 @@ sem_path_3var <- subset(standardizedSolution(sem_3var), tspa_path_3var <- subset(standardizedSolution(tspa_3var), subset = op == "~") -test_that("test if the regression coefficients of factors are similar for two methods", - { - expect_lt( - max(abs(sem_path_3var$est.std - tspa_path_3var$est.std)), - expected = .05 - ) - }) - -test_that("test if the se of regression coefficients are similar for two methods", - { - expect_lt( - max(abs(sem_path_3var$se - tspa_path_3var$se)), - expected = .01 - ) - }) +test_that( + "Regression coefficients of factors are similar for two methods", + { + expect_lt( + max(abs(sem_path_3var$est.std - tspa_path_3var$est.std)), + expected = .05 + ) + } +) + +test_that( + "se of regression coefficients are similar for two methods", + { + expect_lt( + max(abs(sem_path_3var$se - tspa_path_3var$se)), + expected = .01 + ) + } +) # Variance of factors sem_var_3var <- subset(standardizedSolution(sem_3var), @@ -202,18 +294,18 @@ fs_dat_multi <- cbind( # SEM model sem_model_multi <- ' - # latent variables (indicated by factor scores) - visual =~ c(1, 1) * fs_visual - speed =~ c(1, 1) * fs_speed - # constrain the errors - fs_visual ~~ c(0.11501092038276, 0.097236701584) * fs_visual - fs_speed ~~ c(0.07766672265625, 0.07510378617049) * fs_speed - # latent variances - visual ~~ c(v11, v12) * visual - speed ~~ c(v21, v22) * speed - # regressions - visual ~ speed - ' + # latent variables (indicated by factor scores) + visual =~ c(1, 1) * fs_visual + speed =~ c(1, 1) * fs_speed + # constrain the errors + fs_visual ~~ c(0.11501092038276, 0.097236701584) * fs_visual + fs_speed ~~ c(0.07766672265625, 0.07510378617049) * fs_speed + # latent variances + visual ~~ c(v11, v12) * visual + speed ~~ c(v21, v22) * speed + # regressions + visual ~ speed +' sem_multi <- sem(model = sem_model_multi, @@ -224,7 +316,7 @@ sem_multi <- tspa_multi <- tspa( model = "visual ~ speed", data = fs_dat_multi, - se = data.frame( + se_fs = data.frame( visual = c(0.3391326, 0.3118280), speed = c(0.2786875, 0.2740507) ), @@ -240,21 +332,22 @@ sem_path_multi <- subset(standardizedSolution(sem_multi), tspa_path_multi <- subset(standardizedSolution(tspa_multi), subset = op == "~") -test_that("test if the regression coefficients of factors are similar for two methods", - { - expect_equal( - sem_path_multi$est.std, - tspa_path_multi$est.std - ) - }) - -test_that("test if the se of regression coefficients are similar for two methods", - { - expect_equal( - sem_path_multi$se, - tspa_path_multi$se - ) - }) +test_that( + "Regression coefficients of factors are similar for two methods", + { + expect_equal( + sem_path_multi$est.std, + tspa_path_multi$est.std + ) + } +) + +test_that("se of regression coefficients are similar for two methods", { + expect_equal( + sem_path_multi$se, + tspa_path_multi$se + ) +}) # Variance of factors @@ -279,41 +372,7 @@ test_that("test if the se of variance is similar for two methods", { ) }) -# Test tspaSingleGroupMF() -cfa_3fac <- ' - # latent variables - ind60 =~ x1 + x2 + x3 - dem60 =~ y1 + y2 + y3 + y4 - dem65 =~ y5 + y6 + y7 + y8 -' -fs_dat_3fac <- get_fs(PoliticalDemocracy, model = cfa_3fac, std.lv = TRUE) -path_mod <- ' -dem60 ~ ind60 -dem65 ~ ind60 + dem60 -' -tspa_mod_s <- tspaSingleGroupMF( - model = path_mod, - data = fs_dat_3fac, - vc = attr(fs_dat_3fac, "av_efs"), - cross_loadings = attr(fs_dat_3fac, "fsA") -) - -factors_order_s <- subset(lavaan::lavaanify(tspa_mod_s), op == "~") -loadings_order_s <- subset(lavaan::lavaanify(tspa_mod_s), op == "=~") - -test_that("The order of factors in the model from tspaSingleGroupMF()", { - expect_equal(c("dem60", "dem65", "dem65"), factors_order_s$lhs) - expect_equal(c("ind60", "ind60", "dem60"), factors_order_s$rhs) -}) -test_that("The order of loadings in the model from tspaSingleGroupMF()", { - expect_equal(rep(c("ind60", "dem60", "dem65"), each = 3), - loadings_order_s$lhs) - expect_equal(rep(c("fs_ind60", "fs_dem60", "fs_dem65"), 3), - loadings_order_s$rhs) -}) - - -# Test tspaMultipleGroupMF() +# Test tspa_mf() mod4 <- " # latent variables visual =~ x1 + x2 + x3 @@ -323,12 +382,13 @@ mod4 <- " " fs_dat4 <- get_fs(HolzingerSwineford1939, model = mod4, std.lv = TRUE, group = "school") -tspa_mod_m <- tspaMultipleGroupMF( +tspa_mod_m <- tspa_mf( model = "visual ~ speed textual ~ visual + speed", data = fs_dat4, - vc = attr(fs_dat4, "av_efs"), - cross_loadings = attr(fs_dat4, "fsA") + fsT = attr(fs_dat4, "fsT"), + fsL = attr(fs_dat4, "fsL"), + fsb = NULL ) factors_order_m <- subset(lavaan::lavaanify(tspa_mod_m, ngroup = 2), @@ -336,13 +396,13 @@ factors_order_m <- subset(lavaan::lavaanify(tspa_mod_m, ngroup = 2), loadings_order_m <- subset(lavaan::lavaanify(tspa_mod_m, ngroup = 2), op == "=~") -test_that("The order of factors in the model from tspaSingleGroupMF()", { +test_that("The order of factors in the model from tspa_mf()", { expect_equal(rep(c("visual", "textual", "textual"), 2), factors_order_m$lhs) expect_equal(rep(c("speed", "visual", "speed"), 2), factors_order_m$rhs) }) -test_that("The order of loadings in the model from tspaSingleGroupMF()", { +test_that("The order of loadings in the model from tspa_mf()", { expect_equal(rep(c("visual", "textual", "speed"), each = 3) |> rep(2), loadings_order_m$lhs) expect_equal(rep(c("fs_visual", "fs_textual", "fs_speed"), 6), @@ -355,8 +415,8 @@ tspa_fit_m <- tspa( textual ~ visual + speed", data = fs_dat4, group = "school", - vc = attr(fs_dat4, "av_efs"), - cross_loadings = attr(fs_dat4, "fsA") + fsT = attr(fs_dat4, "fsT"), + fsL = attr(fs_dat4, "fsL") ) fs_dat4b <- get_fs(HolzingerSwineford1939, model = mod4, group = "school", method = "Bartlett") @@ -381,3 +441,200 @@ test_that("Multiple-group multiple-factor example", code = { expect_equal(sct$se[sct$op == "~"], expected = scs$se[scs$op == "~"], tolerance = 0.0001) }) + +# An example from Chapter 14 of Grimm et al. (2016) +# https://quantdev.ssri.psu.edu/tutorials/growth-modeling-chapter-14-modeling-change-latent-variables-measured-continuous + +mean_vec <- c(248.83, 270.6, 278.84, 486.26, 448.44, 459.6, + 422.91, 415.63, 374.35) +cov_mat <- matrix(c( + 232.71, 207.92, 188.09, 319.68, 285.26, 277.85, 260.75, 249.28, 217.96, + 207.92, 254.88, 212.14, 331.88, 313.8, 314.91, 274.99, 281.29, 243.6, + 188.09, 212.14, 270.46, 325.97, 308.84, 346.36, 284.9, 291.28, 281.55, + 319.68, 331.88, 325.97, 797.86, 617.02, 581.17, 511.8, 470.36, 420.6, + 285.26, 313.8, 308.84, 617.02, 662.41, 555.9, 448.81, 449.25, 394.63, + 277.85, 314.91, 346.36, 581.17, 555.9, 736.45, 440.78, 439.33, 443.67, + 260.75, 274.99, 284.9, 511.8, 448.81, 440.78, 618.23, 528.01, 437.92, + 249.28, 281.29, 291.28, 470.36, 449.25, 439.33, 528.01, 583.24, 448.64, + 217.96, 243.6, 281.55, 420.6, 394.63, 443.67, 437.92, 448.64, 480.57 +), nrow = 9, ncol = 9, byrow = TRUE) +set.seed(123) +sim_dat <- MASS::mvrnorm(n = 2000, mu = mean_vec, Sigma = cov_mat) |> + `colnames<-`(c("s_g3", "s_g5", "s_g8", "r_g3", "r_g5", "r_g8", + "m_g3", "m_g5", "m_g8")) + +strict_mod <- " +# factor loadings +eta1 =~ 15.1749088 * s_g3 + l2 * r_g3 + l3 * m_g3 +eta2 =~ 15.1749088 * s_g5 + l2 * r_g5 + L3 * m_g5 +eta3 =~ 15.1749088 * s_g8 + l2 * r_g8 + L3 * m_g8 + +# factor variances/covariances +eta1 ~~ 1 * eta1 + eta2 + eta3 +eta2 ~~ eta2 + eta3 +eta3 ~~ eta3 + +# unique variances/covariances +s_g3 ~~ u1 * s_g3 + s_g5 + s_g8 +s_g5 ~~ u1 * s_g5 + s_g8 +s_g8 ~~ u1 * s_g8 +r_g3 ~~ u2 * r_g3 + r_g5 + r_g8 +r_g5 ~~ u2 * r_g5 + r_g8 +r_g8 ~~ u2 * r_g8 +m_g3 ~~ u3 * m_g3 + m_g5 + m_g8 +m_g5 ~~ u3 * m_g5 + m_g8 +m_g8 ~~ u3 * m_g8 + +# latent variable intercepts +eta1 ~ 0 * 1 +eta2 ~ 1 +eta3 ~ 1 + +# observed variable intercepts +s_g3 ~ i1 * 1 +s_g5 ~ i1 * 1 +s_g8 ~ i1 * 1 +r_g3 ~ i2 * 1 +r_g5 ~ i2 * 1 +r_g8 ~ i2 * 1 +m_g3 ~ i3 * 1 +m_g5 ~ i3 * 1 +m_g8 ~ i3 * 1 +" +fs_growth_dat <- get_fs(sim_dat, model = strict_mod, std.lv = TRUE) + +growth_mod <- " +i =~ 1 * eta1 + 1 * eta2 + 1 * eta3 +s =~ 0 * eta1 + start(.5) * eta2 + 1 * eta3 + +# factor variances +eta1 ~~ psi * eta1 +eta2 ~~ psi * eta2 +eta3 ~~ psi * eta3 + +i ~~ start(.8) * i +s ~~ start(.5) * s +i ~~ start(0) * s + +i ~ 0 * 1 +s ~ 1 +" +growth_fit <- tspa(growth_mod, fs_growth_dat, + fsT = attr(fs_growth_dat, "fsT"), + fsL = attr(fs_growth_dat, "fsL"), + fsb = attr(fs_growth_dat, "fsb")) + +########## Error messages ########## + +test_that("Empty path model", { + expect_error( + tspa(model = 123, + data = fs_growth_dat, + fsT = attr(fs_growth_dat, "fsT")), + "The structural path model provided is not a string." + ) +}) + +test_that("Need to provide none or both fsT and fsL", { + expect_error( + tspa(model = growth_mod, + data = fs_growth_dat, + fsT = attr(fs_growth_dat, "fsT")), + "Please provide both or none of fsT and fsL" + ) + expect_error( + tspa(model = growth_mod, + data = fs_growth_dat, + fsL = attr(fs_growth_dat, "fsL")), + "Please provide both or none of fsT and fsL" + ) + expect_no_error( + tspa(growth_mod, + data = fs_growth_dat, + fsT = attr(fs_growth_dat, "fsT"), + fsL = attr(fs_growth_dat, "fsL"), + fsb = attr(fs_growth_dat, "fsb")) + ) +}) + +test_that( + "Names of factor score variables need to match those in the input data", + { + data("PoliticalDemocracy", package = "lavaan") + mod2 <- " + # latent variables + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 + dem65 =~ y5 + y6 + y7 + y8 + " + fs_dat2 <- get_fs(PoliticalDemocracy, model = mod2, std.lv = TRUE) + ecov_fs <- attr(fs_dat2, "fsT") + dimnames(ecov_fs) <- lapply(dimnames(ecov_fs), + FUN = \(x) paste0("bs_", x) + ) + expect_error( + tspa( + model = "dem60 ~ ind60 + dem65 ~ ind60 + dem60", + data = fs_dat2, + fsT = ecov_fs, + fsL = attr(fs_dat2, "fsL") + ), + "Names of factor score variables do not match those in the input data." + ) + expect_no_error( + tspa( + model = "dem60 ~ ind60 + dem65 ~ ind60 + dem60", + data = fs_dat2, + fsT = attr(fs_dat2, "fsT"), + fsL = attr(fs_dat2, "fsL") + ) + ) + } +) + +test_that("Test indicator names not starting with 'fs_'", { + data("PoliticalDemocracy", package = "lavaan") + mod2 <- " + # latent variables + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 + dem65 =~ y5 + y6 + y7 + y8 + " + fs_dat2 <- get_fs(PoliticalDemocracy, model = mod2, std.lv = TRUE) + names(fs_dat2) <- gsub("fs_", "bs_", names(fs_dat2)) + ecov_fs <- attr(fs_dat2, "fsT") + dimnames(ecov_fs) <- lapply(dimnames(ecov_fs), + FUN = \(x) gsub("fs_", "bs_", x)) + mat_ld <- attr(fs_dat2, "fsL") + rownames(mat_ld) <- gsub("fs_", "bs_", rownames(mat_ld)) + expect_no_error( + bs_fit <- tspa(model = "dem60 ~ ind60 + dem65 ~ ind60 + dem60", + data = fs_dat2, + fsT = ecov_fs, + fsL = mat_ld) + ) + fs_fit <- tspa(model = "dem60 ~ ind60 + dem65 ~ ind60 + dem60", + data = get_fs(PoliticalDemocracy, model = mod2, std.lv = TRUE), + fsT = attr(fs_dat2, "fsT"), + fsL = attr(fs_dat2, "fsL")) + expect_identical( + parameterestimates(bs_fit)["est"], parameterestimates(fs_fit)["est"] + ) +}) + +test_that("Missing group argument for a multigroup model", { + expect_error( + tspa( + model = "visual ~ speed + textual ~ visual + speed", + data = fs_dat4, + fsT = attr(fs_dat4, "fsT"), + fsL = attr(fs_dat4, "fsL") + ), + "Please specify 'group = ' to fit a multigroup model in lavaan" + ) +}) diff --git a/vignettes/.gitignore b/vignettes/.gitignore old mode 100755 new mode 100644 diff --git a/vignettes/tspa-vignette.Rmd b/vignettes/R2spa.Rmd similarity index 95% rename from vignettes/tspa-vignette.Rmd rename to vignettes/R2spa.Rmd index 71b2302..4fc8bfc 100644 --- a/vignettes/tspa-vignette.Rmd +++ b/vignettes/R2spa.Rmd @@ -53,7 +53,7 @@ To build a Two-Stage Path Analysis model, simply call the `tspa()` function with ```{r} tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat, - se = list(ind60 = 0.1213615, dem60 = 0.6756472)) + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472)) ``` To view the Two-Stage Path Analysis model, use `attributes([model name])$tspaModel`. Function `cat()` can help tidy up the model output. In the output, the values of error constraints are computed by squaring standard errors from the previous section. @@ -102,8 +102,8 @@ To build a Two-Stage Path Analysis model, simply call the `tspa()` function with tspa_3var_fit <- tspa(model = "dem60 ~ ind60 dem65 ~ ind60 + dem60", data = fs_dat, - se = list(ind60 = 0.1213615, dem60 = 0.6756472, - dem65 = 0.5724405)) + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472, + dem65 = 0.5724405)) cat(attributes(tspa_3var_fit)$tspaModel) ``` @@ -147,8 +147,8 @@ Function `standardizedsolution()` enables user to view a table of standard error # tspa model tspa_fit <- tspa(model = "visual ~ speed", data = fs_hs, - se = data.frame(visual = c(0.3391326, 0.311828), - speed = c(0.2786875, 0.2740507)), + se_fs = data.frame(visual = c(0.3391326, 0.311828), + speed = c(0.2786875, 0.2740507)), group = "school" # group.equal = "regressions" ) diff --git a/vignettes/boo_joint.RDS b/vignettes/boo_joint.RDS new file mode 100644 index 0000000..1b17413 Binary files /dev/null and b/vignettes/boo_joint.RDS differ diff --git a/vignettes/boo_separate.RDS b/vignettes/boo_separate.RDS new file mode 100644 index 0000000..a1a7a72 Binary files /dev/null and b/vignettes/boo_separate.RDS differ diff --git a/vignettes/categorical-interaction.Rmd b/vignettes/categorical-interaction.Rmd new file mode 100644 index 0000000..c82cbab --- /dev/null +++ b/vignettes/categorical-interaction.Rmd @@ -0,0 +1,245 @@ +--- +title: "Using Two-Stage Path Analysis with Definition Variables for Latent Interactions with Categorical Indicators" +author: "Mark Lai" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{categorical-interaction} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r} +library(mirt) # for IRT +library(OpenMx) +library(umx) +library(R2spa) +set.seed(591028) +``` + +## Simulate Data + +```{r} +#' Population parameters +num_obs <- 5000 # relatively large sample +cov_xm <- .45 +betas <- c(.2, .3, -.4) +# Results seem more accurate with more items +lambdax <- 1.7 * c(.6, .7, .8, .5, .5, .3, .9, .7, .6) # on normal ogive metric +lambdam <- 1.7 * c(.8, .7, .7, .6, .5) # on normal ogive metric +lambday <- c(.5, .7, .9) +thresx <- matrix(c(1, 1.5, 1.3, 0.5, 2, -0.5, -1, 0.5, 0.8), nrow = 1) # binary +thresm <- matrix(c(-2, -2, -0.4, -0.5, 0, + -1.5, -0.5, 0, 0, 0.5, + -1, 1, 1.5, 0.8, 1), nrow = 3, + byrow = TRUE) # ordinal with 4 categories +inty <- 1.5 # intercept of y +# error variance of y +r2y <- crossprod( + betas, + matrix(c(1, cov_xm, 0, cov_xm, 1, 0, 0, 0, 1 + cov_xm^2), nrow = 3) + ) %*% betas +evar <- as.numeric(1 - r2y) + +#' Simulate latent variables X and M with variances of 1, and the disturbance of +#' Y +cov_xm_ey <- matrix(c(1, cov_xm, 0, + cov_xm, 1, 0, + 0, 0, evar), nrow = 3) +eta <- MASS::mvrnorm(num_obs, mu = rep(0, 3), Sigma = cov_xm_ey, + empirical = TRUE) +# Add product term +eta <- cbind(eta, eta[, 1] * eta[, 2]) + +#' Compute latent y (standardized) +etay <- inty + eta[, -3] %*% betas + eta[, 3] +# Verify the structural coefficients with eta known +lm(etay ~ eta[, -3]) + +#' Simulate y (continuous) +y <- t( + lambday %*% t(etay) + + rnorm(num_obs * length(lambday), sd = sqrt(1 - lambday^2)) +) + +#' Simulate latent continuous responses for X and M +xstar <- eta[, 1] %*% t(lambdax) + rnorm(num_obs * length(lambdax)) +mstar <- eta[, 2] %*% t(lambdam) + rnorm(num_obs * length(lambdam)) +#' Obtain categorical items +x <- vapply( + seq_along(lambdax), + FUN = \(i) { + findInterval(xstar[, i], thresx[, i]) + }, + FUN.VALUE = numeric(num_obs)) +m <- vapply( + seq_along(lambdam), + FUN = \(i) { + findInterval(mstar[, i], thresm[, i]) + }, + FUN.VALUE = numeric(num_obs)) +``` + +## Compute Factor Scores + +### Y + +```{r} +fsy <- get_fs(data = y, std.lv = TRUE) +``` + +### X + +In this example, we use EAP scores, which are conceptually similar to the regression scores for continuous data. + +```{r} +irtx <- mirt(data.frame(x), itemtype = "2PL", + verbose = FALSE) # IRT (2-PL) for x +marginal_rxx(irtx) # marginal reliability +fsx <- fscores(irtx, full.scores.SE = TRUE) +empirical_rxx(fsx) # empirical reliability +``` + +### M + +```{r} +irtm <- mirt(data.frame(m), itemtype = "graded", + verbose = FALSE) # IRT (GRM) for m +marginal_rxx(irtm) # marginal reliability +fsm <- fscores(irtm, full.scores.SE = TRUE) +empirical_rxx(fsm) # empirical reliability +``` + +## Loading and Error Variances for Product of Factor scores + +Let $\tilde \eta_X$ and $\tilde \eta_M$ be factor scores of the predictor and the moderator. Then + +$$\tilde \eta_X \tilde \eta_M = (\lambda_X \eta_X + e_{\tilde X}) (\lambda_M \eta_M + e_{\tilde M}) = (\lambda_X \lambda_M \eta_X \eta_M) + \lambda_X \eta_X e_M + \lambda_M \eta_M e_X + e_X e_M$$ + +So the loading is $\lambda_X \lambda_M$, and the error variance for the product indicator is + +$$\lambda_X^2 V(\eta_X) V(e_M) + \lambda_M^2 V(\eta_M) V(e_X) + V(e_X) V(e_M)$$ + +## Directly Using Factor Scores + +```{r} +# Assemble data +fs_dat <- data.frame(fsy[c(1, 3, 4)], fsx, fsm) |> + setNames(c( + "fsy", "rel_fsy", "ev_fsy", + "fsx", "se_fsx", "fsm", "se_fsm" + )) |> + # Compute reliability; only needed for 2S-PA + within(expr = { + rel_fsx <- 1 - se_fsx^2 + rel_fsm <- 1 - se_fsm^2 + ev_fsx <- se_fsx^2 * (1 - se_fsx^2) + ev_fsm <- se_fsm^2 * (1 - se_fsm^2) + }) |> + # Add interaction + within(expr = { + fsxm <- fsx * fsm + ld_fsxm <- rel_fsx * rel_fsm + ev_fsxm <- rel_fsx^2 * ev_fsm + rel_fsm^2 * ev_fsx + ev_fsm * ev_fsx + }) +``` + +```{r} +lm(fsy ~ fsx * fsm, data = fs_dat) # some bias +``` + +## Two-Stage Path Analysis + +1. Create OpenMx model without latent variables + +```{r} +fsreg_mx <- mxModel("str", + type = "RAM", + manifestVars = c("fsy", "fsx", "fsm", "fsxm"), + # Path coefficients + mxPath( + from = c("fsx", "fsm", "fsxm"), to = "fsy", + values = 0 + ), + # Variances + mxPath( + from = c("fsy", "fsx", "fsm", "fsxm"), + to = c("fsy", "fsx", "fsm", "fsxm"), + arrows = 2, + values = c(0.6, 1, 1, 1) + ), + # Covariances + mxPath( + from = c("fsx", "fsm", "fsxm"), + to = c("fsx", "fsm", "fsxm"), + arrows = 2, connect = "unique.pairs", + values = 0 + ), + mxPath( + from = "one", to = c("fsy", "fsx", "fsm", "fsxm"), + values = 0 + ) +) +fsreg_umx <- umxLav2RAM( + " + fsy ~ b1 * fsx + b2 * fsm + b3 * fsxm + fsy + fsx + fsm + fsxm ~ 1 + fsx ~~ vx * fsx + fsm + fsxm + fsm ~~ vm * fsm + fsxm + ", + printTab = FALSE) +``` + +```{r} +DiagrammeR::grViz(omxGraphviz(fsreg_mx)) +``` + + +2. Create loading and error covariance matrices + +```{r} +# Loading +matL <- mxMatrix( + type = "Diag", nrow = 4, ncol = 4, + free = FALSE, + labels = c("data.rel_fsy", "data.rel_fsx", "data.rel_fsm", "data.ld_fsxm"), + name = "L" +) +# Error +matE <- mxMatrix( + type = "Diag", nrow = 4, ncol = 4, + free = FALSE, + labels = c("data.ev_fsy", "data.ev_fsx", "data.ev_fsm", "data.ev_fsxm"), + name = "E" +) +``` + +3. Combine (1) and (2), and run 2S-PA + +```{r} +tspa_mx <- + tspa_mx_model( + mxModel(fsreg_umx, + mxAlgebra(b1 * sqrt(vx), name = "stdx_b1"), + mxAlgebra(b2 * sqrt(vm), name = "stdx_b2"), + mxAlgebra(b3 * sqrt(vx) * sqrt(vm), name = "stdx_b3"), + mxCI(c("stdx_b1", "stdx_b2", "stdx_b3"))), + data = fs_dat, + mat_ld = matL, mat_vc = matE) +# Run OpenMx +tspa_mx_fit <- mxRun(tspa_mx, intervals = TRUE) +# Summarize the results (takes a minute or so) +# Also include the X-Standardized coefficients +summary(tspa_mx_fit) +# Standard errors for X-Standardized coefficients +mxSE(m1.stdx_b1, tspa_mx_fit) +mxSE(m1.stdx_b2, tspa_mx_fit) +mxSE(m1.stdx_b3, tspa_mx_fit) +``` diff --git a/vignettes/corrected-se.Rmd b/vignettes/corrected-se.Rmd new file mode 100644 index 0000000..ab44db1 --- /dev/null +++ b/vignettes/corrected-se.Rmd @@ -0,0 +1,326 @@ +--- +title: "Corrected Standard Errors" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{corrected-se} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +header-includes: + - \DeclareMathOperator{\Var}{\mathrm{Var}} + - \DeclareMathOperator{\E}{\mathrm{E}} + - \newcommand{\bv}[1]{\boldsymbol{\mathbf{#1}}} %APA-consistent bold +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette shows how to obtain Taylor-series corrected standard errors in 2S-PA, accounting for the uncertainty in the weights for obtaining the factor scores. It is described in [this paper](https://doi.org/10.1007/s00181-020-01942-z). + +The second-stage estimation treats as known the loading matrix and the error covariance matrix of the factor scores as indicators of the true latent latent variables. However, those are estimates based on the first-stage measurement model. When the sample size is large and/or the reliability of the factor scores are high, the uncertainty in the estimated loading and error covariance matrix is generally negligible. Otherwise, a first-order correction on the standard errors of the second-stage estimation is possible, as illustrated below. + +## First-order correction for SE + +$$\hat V_{\gamma, c} = \hat V_{\gamma} + \boldsymbol{J}_\boldsymbol{\gamma}(\hat{\boldsymbol{\theta}}) \hat V_{\theta} \boldsymbol{J}_\boldsymbol{\gamma}(\hat{\boldsymbol{\theta}})^\top,$$ + +where $\boldsymbol{J}_\boldsymbol{\gamma}$ is the Jacobian matrix of $\hat{\boldsymbol{\gamma}}$ with respect to $\boldsymbol{\theta}$, or + +$$\hat V_{\gamma, c} = \hat V_{\gamma} + (\boldsymbol{H}_\gamma)^{-1} \left(\frac{\partial^2 \ell}{\partial \theta \partial \gamma^\top}\right) \hat V_{\theta} \left(\frac{\partial^2 \ell}{\partial \theta \partial \gamma^\top}\right)^\top (\boldsymbol{H}_\gamma)^{-1},$$ + +where $V_{\gamma}$ is the naive covariance matrix of the structural parameter estimates $\hat{\boldsymbol{\gamma}}$ assuming the measurement error variance parameter, $\boldsymbol{\theta}$, is known, $\boldsymbol{H}_\gamma$ is the Hessian matrix of the log-likelihood $\ell$ with respect to $\hat{\boldsymbol{\gamma}}$, and $V_{\theta}$ can be obtained in the first-stage measurement model analysis. + +```{r load-pkg, message = FALSE} +library(lavaan) +library(R2spa) +library(numDeriv) +library(boot) +``` + +This example is based on the Political Democracy data used in and as an example in Bollen (1989)'s book. + +## Separate Measurement Models + +```{r} +model <- ' + # latent variable definitions + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 + + # regressions + dem60 ~ ind60 +' +``` + +```{r} +cfa_ind60 <- cfa("ind60 =~ x1 + x2 + x3", data = PoliticalDemocracy) +# Regression factor scores +fs1 <- get_fs_lavaan(cfa_ind60, vfsLT = TRUE) +# Delta method variance of (loading, intercept) +vldev1 <- attr(fs1, which = "vfsLT") +cfa_dem60 <- cfa("dem60 =~ y1 + y2 + y3 + y4", + data = PoliticalDemocracy) +# Regression factor scores +fs2 <- get_fs_lavaan(cfa_dem60, vfsLT = TRUE) +# Delta method variance of (loading, intercept) +vldev2 <- attr(fs2, which = "vfsLT") +fs_dat <- data.frame( + fs_ind60 = fs1$fs_ind60, + fs_dem60 = fs2$fs_dem60 +) +# Combine sampling variance of loading and error variance +# Note: loadings first, then error variance +vldev <- block_diag(vldev1, vldev2)[c(1, 3, 2, 4), c(1, 3, 2, 4)] +``` + +```{r} +# 2S-PA +# Assemble loadings +ld <- block_diag(attr(fs1, which = "fsL"), + attr(fs2, which = "fsL")) +ev <- block_diag(attr(fs1, which = "fsT"), + attr(fs2, which = "fsT")) +tspa_fit <- tspa(model = "dem60 ~ ind60", + data = fs_dat, + fsL = ld, + fsT = ev) +# Unadjusted covariance +vcov(tspa_fit) +# Adjusted covariance matrix +(vc_cor <- vcov_corrected(tspa_fit, vfsLT = vldev, which_free = c(1, 4, 5, 7))) +# Corrected standard errors +sqrt(diag(vc_cor)) +``` + +Standardized solution + +```{r} +tspa_est_std <- function(ld_ev) { + ld1 <- ld + ev1 <- ev + ld1[c(1, 4)] <- ld_ev[1:2] + ev1[c(1, 4)] <- ld_ev[3:4] + tfit <- tspa(model = "dem60 ~ ind60", + data = fs_dat, + fsL = ld1, + fsT = ev1) + standardizedSolution(tfit)$est.std[8] +} +tspa_est_std(c(ld[c(1, 4)], ev[c(1, 4)])) +# Jacobian +jac_std <- numDeriv::jacobian(tspa_est_std, + x = c(ld[c(1, 4)], ev[c(1, 4)])) +# Corrected standard error +sqrt( + standardizedSolution(tspa_fit)[8, "se"]^2 + + jac_std %*% vldev %*% t(jac_std) +) +``` + +Compare to joint structural and measurement model + +```{r} +sem_fit <- sem(model, data = PoliticalDemocracy) +# Larger standard error +(vc_j <- + vcov(sem_fit)[c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"), + c("dem60~ind60", "ind60~~ind60", "dem60~~dem60")]) +sqrt(diag(vc_j)) +``` + +Bootstrap Standard Errors + +```{r, eval = FALSE, echo = TRUE} +run_tspa <- function(df, inds) { + fs_ind60 <- get_fs(data = df[inds, ], model = "ind60 =~ x1 + x2 + x3", + se = "none", test = "none") + fs_dem60 <- get_fs(data = df[inds, ], model = "dem60 =~ y1 + y2 + y3 + y4", + se = "none", test = "none") + fs_dat <- cbind(fs_ind60, fs_dem60) + # Assemble loadings + ld <- block_diag(attr(fs_ind60, which = "fsL"), + attr(fs_dem60, which = "fsL")) + ev <- block_diag(attr(fs_ind60, which = "fsT"), + attr(fs_dem60, which = "fsT")) + tspa_fit <- tspa(model = "dem60 ~ ind60", + data = fs_dat, + fsL = ld, + fsT = ev, + test = "none") + coef(tspa_fit) +} +boo <- boot(PoliticalDemocracy, statistic = run_tspa, R = 1999) +``` + +```{r, eval = FALSE, include = FALSE} +saveRDS(boo, file = "boo_separate.RDS") +``` + +```{r, include = FALSE} +boo <- readRDS("boo_separate.RDS") +``` + +```{r} +# Use MAD to downweigh outlying replications +boo$t |> + apply(MARGIN = 2, FUN = mad) |> + setNames(c("dem60~ind60", "ind60~~ind60", "dem60~~dem60")) +``` + +The standard errors seem to diverge slightly among methods. SE(`dem60~ind60`) with bootstrap is the lowest. SE(`ind60~~ind60`) with the joint model is particularly higher than the other two. SE(`dem60~~dem60`) is the lowest with the corrected 2S-PA. It should be pointed out that the joint model and 2S-PA are different estimators. + +```{r, eval = FALSE} +run_sem <- function(df, inds) { + sem_fit <- sem(model, data = df[inds, ], se = "none", test = "none") + coef(sem_fit) +} +boo <- boot(PoliticalDemocracy, statistic = run_sem, R = 999) +``` + +## Joint Measurement Model + +```{r} +cfa_joint <- cfa("ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4", + data = PoliticalDemocracy) +# Factor score +fs_joint <- get_fs_lavaan(cfa_joint, vfsLT = TRUE) +# Delta method variance +vldev_joint <- attr(fs_joint, which = "vfsLT") +``` + +```{r} +tspa_fit2 <- tspa(model = "dem60 ~ ind60", + data = fs_joint, + fsT = attr(fs_joint, "fsT"), + fsL = attr(fs_joint, "fsL")) +# Unadjusted covariance +vcov(tspa_fit2) +# Adjusted covariance +(vc2_cor <- vcov_corrected(tspa_fit2, vfsLT = vldev_joint)) +# Corrected standard errors +sqrt(diag(vc2_cor)) +``` + +Bootstrap Standard Errors + +```{r, eval = FALSE} +run_tspa2 <- function(df, inds) { + fs_joint <- get_fs(df[inds, ], + model = "ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4", + se = "none", test = "none") + tspa_fit2 <- tspa(model = "dem60 ~ ind60", + data = fs_joint, + fsT = attr(fs_joint, "fsT"), + fsL = attr(fs_joint, "fsL"), + test = "none") + coef(tspa_fit2) +} +boo2 <- boot(PoliticalDemocracy, statistic = run_tspa2, R = 1999) +# run_tspa2b <- function(df, inds) { +# fsb_joint <- get_fs(df[inds, ], +# model = "ind60 =~ x1 + x2 + x3 +# dem60 =~ y1 + y2 + y3 + y4", +# se = "none", test = "none", method = "Bartlett") +# tspa_fit2b <- tspa(model = "dem60 ~ ind60", +# data = fsb_joint, +# fsT = attr(fsb_joint, "fsT"), +# fsL = diag(2), +# test = "none") +# coef(tspa_fit2b) +# } +# boo2b <- boot(PoliticalDemocracy, statistic = run_tspa2b, R = 4999) +# run_sem2 <- function(df, inds) { +# sem_fit <- sem(model, data = df[inds, ]) +# coef(sem_fit)[c(6, 14, 15)] +# } +# boo2j <- boot(PoliticalDemocracy, statistic = run_sem2, R = 4999) +``` + +```{r, eval = FALSE, include = FALSE} +saveRDS(boo2, file = "boo_joint.RDS") +``` + +```{r, include = FALSE} +boo2 <- readRDS("boo_joint.RDS") +``` + +```{r} +# Use MAD to downweigh outlying replications +boo2$t |> + apply(MARGIN = 2, FUN = mad) |> + setNames(c("dem60~ind60", "ind60~~ind60", "dem60~~dem60")) +``` + +### With Bartlett's Method + +```{r} +# Factor score +fsb_joint <- get_fs(PoliticalDemocracy, + model = "ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4", + method = "Bartlett", + vfsLT = TRUE) +vldevb_joint <- attr(fsb_joint, which = "vfsLT") +``` + +```{r} +tspa_fit2b <- tspa(model = "dem60 ~ ind60", + data = fsb_joint, + fsT = attr(fsb_joint, "fsT"), + fsL = diag(2) |> + `dimnames<-`(list(c("fs_ind60", "fs_dem60"), + c("ind60", "dem60")))) +# Unadjusted covariance matrix +vcov(tspa_fit2b) +# Adjusted covariance matrix +(vc2b_cor <- vcov_corrected( + tspa_fit2b, + # Exclude fixed loadings and error variance + vfsLT = vldevb_joint[c(5, 7), c(5, 7)], + # Specify which elements are free (error variances only) + which_free = c(5, 7))) +# Corrected standard errors +sqrt(diag(vc2b_cor)) +``` + +## Multiple Groups + +```{r} +# Multigroup, three-factor example +mod <- " + # latent variables + visual =~ x1 + x2 + x3 + textual =~ x4 + x5 + x6 + speed =~ x7 + x8 + x9 +" +# Factor scores based on partial invariance +fs_dat <- get_fs(HolzingerSwineford1939, model = mod, std.lv = TRUE, + group = "school", + group.equal = c("loadings", "intercepts"), + group.partial = c("visual=~x2", "x7~1")) +fit <- cfa(mod, + data = HolzingerSwineford1939, + std.lv = TRUE, + group = "school", + group.equal = c("loadings", "intercepts"), + group.partial = c("visual=~x2", "x7~1")) +fs_dat <- get_fs_lavaan(fit) +vldev <- R2spa:::vcov_ld_evfs(fit) +``` + +```{r} +tspa_fit <- tspa(model = "visual ~~ textual + speed + textual ~~ speed", + data = fs_dat, + group = "school", + fsL = attr(fs_dat, which = "fsL"), + fsT = attr(fs_dat, which = "fsT")) +# Unadjusted covariance +vcov(tspa_fit)[c(1:6, 10:15), c(1:6, 10:15)] +# Adjusted covariance +vcov_corrected(tspa_fit, vfsLT = vldev)[c(1:6, 10:15), c(1:6, 10:15)] +``` diff --git a/vignettes/correction-error.Rmd b/vignettes/correction-error.Rmd new file mode 100644 index 0000000..eb35767 --- /dev/null +++ b/vignettes/correction-error.Rmd @@ -0,0 +1,256 @@ +--- +title: "Correction to Measurement Error" +author: "Mark Lai" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{correction-error} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +When computing the reliability or the standard errors of factor scores, we typically condition on the estimates of the model parameters, aka assuming that they are known. However, when sample size is small, ignoring those can be problematic as the model parameters are imprecise. This vignette provides details on how to obtain more realistic estimates of standard errors of factor scores. + +# Unidimensional Case + +Consider the conditional variance of the factor scores, $Var(\tilde \eta \mid \eta)$, with $\tilde \eta = \boldsymbol{a}^\top \boldsymbol{y}$. From the factor model, we have + +$$Var(\tilde \eta \mid \eta) = Var(\boldsymbol{a}^\top \boldsymbol{\lambda} \eta + \boldsymbol{a}^\top \boldsymbol{\varepsilon} \mid \eta) = \eta^2 Var(\boldsymbol{a}^\top \boldsymbol{\lambda}) + Var(\boldsymbol{a}^\top \boldsymbol{\varepsilon}).$$ + +The second term is the error component. Here is a small simulation: + +```{r} +#' Load package and set seed +library(lavaan) +library(R2spa) +set.seed(191254) + +#' Fixed conditions +num_obs <- 100 +lambda <- c(.3, .5, .7, .9) +theta <- c(.5, .4, .5, .7) + +#' Simulate true score, and standardized +eta <- rnorm(num_obs) +eta <- (eta - mean(eta)) +eta <- eta / sqrt(mean(eta^2)) + +#' Function for simulating y +simy <- function(eta, lambda) { + t(tcrossprod(lambda, eta) + + rnorm(length(lambda) * length(eta), sd = sqrt(theta))) +} +``` + +```{r, eval = FALSE} +#' Simulation +nsim <- 2500 +tfsy_sim <- fsy_sim <- matrix(NA, nrow = num_obs, ncol = nsim) +# Also save the scoring matrix +a_sim <- matrix(NA, nrow = length(lambda), ncol = nsim) +for (i in seq_len(nsim)) { + y <- simy(eta = eta, lambda = lambda) + tfsy_sim[, i] <- R2spa::compute_fscore( + y, lambda = lambda, theta = diag(theta), psi = matrix(1) + ) + fsy <- R2spa::get_fs(y, std.lv = TRUE) + fsy_sim[, i] <- fsy[, 1] + a_sim[, i] <- attr(fsy, which = "scoring_matrix") +} +``` + +```{r echo = FALSE, eval = FALSE} +saveRDS(list(tfsy_sim, fsy_sim, a_sim), file = "sim_correction-error.RDS") +``` + +```{r echo = FALSE, include = FALSE} +tfsy_sim <- readRDS("sim_correction-error.RDS")[[1]] +fsy_sim <- readRDS("sim_correction-error.RDS")[[2]] +a_sim <- readRDS("sim_correction-error.RDS")[[3]] +``` + +```{r} +#' Average conditional variance +# a known +apply(tfsy_sim, 1, var) |> mean() +# compare to theoretical value +true_a <- R2spa:::compute_a_reg(lambda, psi = matrix(1), theta = diag(theta)) +true_a %*% diag(theta) %*% t(true_a) +# a estimated +apply(fsy_sim, 1, var) |> mean() +``` + +The standard errors are larger when using sample estimates of weights. + +When $\boldsymbol{a}$ is known, the error variance is simply $\boldsymbol{a}^\top \boldsymbol{\Theta} \boldsymbol{a}$. However, when $\boldsymbol{a}$ is unknown and estimated as $\tilde{\boldsymbol{a}}$ from the data, + +$$Var(\tilde{\boldsymbol{a}}^\top \boldsymbol{\varepsilon}) = E[Var(\tilde{\boldsymbol{a}}^\top \boldsymbol{\varepsilon} \mid \tilde{\boldsymbol{a}})] + Var[E(\tilde{\boldsymbol{a}}^\top \boldsymbol{\varepsilon} \mid \tilde{\boldsymbol{a}})].$$ + +Under the assumption of $E(\boldsymbol{\varepsilon})$ = $\boldsymbol{0}$, the second term is 0. The first term is an expectation of a quadratic form, + +$$E(\tilde{\boldsymbol{a}}^\top \boldsymbol{\Theta} \tilde{\boldsymbol{a}}) = E(\tilde{\boldsymbol{a}}^\top) \boldsymbol{\Theta} E(\tilde{\boldsymbol{a}}) + \mathrm{tr}(\boldsymbol{\Theta} \boldsymbol{V}_{\tilde{\boldsymbol{a}}})$$ + +where $\boldsymbol{V}_{\tilde{\boldsymbol{a}}}$ is the covariance matrix of $\tilde{\boldsymbol{a}}$. We can see this using the simulated data + +```{r} +correct_fac <- sum(diag(diag(theta) %*% cov(t(a_sim)))) +true_a %*% diag(theta) %*% t(true_a) + correct_fac +``` + +## Correction Factor + +We can approximate $\boldsymbol{V}_{\tilde{\boldsymbol{a}}}$ using the [delta method](https://en.wikipedia.org/wiki/Delta_method), and the other terms in the above using sample estimates + +$$\tilde{\boldsymbol{a}}^\top \hat{\boldsymbol{\Theta}} \tilde{\boldsymbol{a}} + \mathrm{tr}(\hat{\boldsymbol{\Theta}} \hat{\boldsymbol{V}}_{\tilde{\boldsymbol{a}}}).$$ + +The delta method estimate, $\hat{\boldsymbol{V}}_{\tilde{\boldsymbol{a}}}$, is + +$$\boldsymbol{J}_{\tilde{\boldsymbol{a}}}(\boldsymbol{\theta})^\top \hat{\boldsymbol{V}}_{\tilde{\boldsymbol{\theta}}} \boldsymbol{J}_{\tilde{\boldsymbol{a}}}(\boldsymbol{\theta}),$$ + +where $\boldsymbol{J}_{\tilde{\boldsymbol{a}}}(\boldsymbol{\theta})$ is the Jacobian matrix, and $\hat{\boldsymbol{V}}_{\tilde{\boldsymbol{\theta}}}$ is the sampling variance of the sample estimator of $\boldsymbol{\theta}$. The Jacobian can be approximated using finite difference with `lavaan::lav_func_jacobian_complex()`. + +```{r} +y <- simy(eta = eta, lambda = lambda) +cfa_fit <- cfa("f =~ y1 + y2 + y3 + y4", + data = setNames(data.frame(y), paste0("y", 1:4)), + std.lv = TRUE) +# Jacobian +R2spa:::compute_a(coef(cfa_fit), lavobj = cfa_fit) +jac_a <- lavaan::lav_func_jacobian_complex( + \(x) R2spa:::compute_a(x, lavobj = cfa_fit)[[1]], + coef(cfa_fit) +) +sum(diag(lavInspect(cfa_fit, what = "est")$theta %*% + jac_a %*% vcov(cfa_fit) %*% t(jac_a))) +fsy <- R2spa::get_fs(y, std.lv = TRUE) +attr(fsy, which = "fsT") +# Using `get_fs(..., corrected_fsT = TRUE) +fsy2 <- R2spa::get_fs(y, std.lv = TRUE, corrected_fsT = TRUE) +attr(fsy2, which = "fsT") +``` + +# Multidimensional Case + +```{r} +#' Set seed +set.seed(251329) + +#' Fixed conditions +num_obs <- 100 +lambda <- Matrix::bdiag(list(c(.3, .5, .7, .9), c(.7, .6, .7))) |> + as.matrix() +theta <- c(.5, .4, .5, .7, .7, .8, .5) +psi <- matrix(c(1, -.4, -.4, 1), nrow = 2) + +#' Simulate true score (exact means and covariances) +eta <- MASS::mvrnorm(num_obs, mu = rep(0, 2), Sigma = psi, empirical = TRUE) + +#' Function for simulating y +simy <- function(eta, lambda) { + tcrossprod(eta, lambda) + + MASS::mvrnorm(num_obs, mu = rep(0, length(theta)), Sigma = diag(theta)) +} +``` + +```{r, eval = FALSE} +#' Simulation +nsim <- 2500 +tfsy_sim <- fsy_sim <- array(NA, dim = c(num_obs, ncol(lambda), nsim)) +# Also save the scoring matrix +a_sim <- array(NA, dim = c(ncol(lambda), nrow(lambda), nsim)) +for (i in seq_len(nsim)) { + y <- simy(eta = eta, lambda = lambda) + tfsy_sim[, , i] <- R2spa::compute_fscore( + y, lambda = lambda, theta = diag(theta), psi = psi + ) + fsy <- R2spa::get_fs( + data.frame(y) |> setNames(paste0("y", 1:7)), + model = "f1 =~ y1 + y2 + y3 + y4\nf2 =~ y5 + y6 + y7", + std.lv = TRUE) + fsy_sim[, , i] <- as.matrix(fsy[, 1:2]) + a_sim[, , i] <- attr(fsy, which = "scoring_matrix") +} +``` + +```{r echo = FALSE, eval = FALSE} +saveRDS(list(tfsy_sim, fsy_sim, a_sim), file = "sim_correction-error-multi.RDS") +``` + +```{r echo = FALSE, include = FALSE} +tfsy_sim <- readRDS("sim_correction-error-multi.RDS")[[1]] +fsy_sim <- readRDS("sim_correction-error-multi.RDS")[[2]] +a_sim <- readRDS("sim_correction-error-multi.RDS")[[3]] +``` + +```{r} +#' Average conditional variance +# a known +apply(tfsy_sim, MARGIN = 1, FUN = \(x) cov(t(x))) |> + rowMeans() |> matrix(ncol = 2, nrow = 2) +# compare to theoretical value +true_a <- R2spa:::compute_a_reg(lambda, psi = psi, theta = diag(theta)) +true_a %*% diag(theta) %*% t(true_a) +# a estimated +apply(fsy_sim, MARGIN = 1, FUN = \(x) cov(t(x))) |> + rowMeans() |> matrix(ncol = 2, nrow = 2) +``` + +```{r} +correct_fac11 <- sum(diag(diag(theta) %*% cov(t(a_sim[1, , ])))) +correct_fac22 <- sum(diag(diag(theta) %*% cov(t(a_sim[2, , ])))) +correct_fac21 <- sum(diag(diag(theta) %*% + cov(t(a_sim[2, , ]), t(a_sim[1, , ])))) +correct_fac <- matrix(c(correct_fac11, correct_fac21, + correct_fac21, correct_fac22), nrow = 2) +true_a %*% diag(theta) %*% t(true_a) + correct_fac +``` + +The $i$, $j$ element of the correction matrix can be approximated by $\mathrm{tr}(\hat{\boldsymbol{\Theta}} \hat{\boldsymbol{V}}_{{\tilde{\boldsymbol{a}}_i}{\tilde{\boldsymbol{a}}_j}})$, with $\hat{\boldsymbol{V}}_{{\tilde{\boldsymbol{a}}_i}{\tilde{\boldsymbol{a}}_j}})$ obtained by + +$$\boldsymbol{J}_{\tilde{\boldsymbol{a}}_i}(\boldsymbol{\theta})^\top \hat{\boldsymbol{V}}_{\tilde{\boldsymbol{\theta}}} \boldsymbol{J}_{\tilde{\boldsymbol{a}}_j}(\boldsymbol{\theta}),$$ + +```{r} +y <- simy(eta = eta, lambda = lambda) +cfa_fit <- cfa("f1 =~ y1 + y2 + y3 + y4\nf2 =~ y5 + y6 + y7", + data = setNames(data.frame(y), paste0("y", 1:7)), + std.lv = TRUE) +# Jacobian +R2spa:::compute_a(coef(cfa_fit), lavobj = cfa_fit) +jac_a1 <- lavaan::lav_func_jacobian_complex( + \(x) R2spa:::compute_a(x, lavobj = cfa_fit)[[1]][1, ], + coef(cfa_fit) +) +jac_a2 <- lavaan::lav_func_jacobian_complex( + \(x) R2spa:::compute_a(x, lavobj = cfa_fit)[[1]][2, ], + coef(cfa_fit) +) +# Correction[1, 1] +sum(diag(lavInspect(cfa_fit, what = "est")$theta %*% + jac_a1 %*% vcov(cfa_fit) %*% t(jac_a1))) +# Correction[2, 2] +sum(diag(lavInspect(cfa_fit, what = "est")$theta %*% + jac_a2 %*% vcov(cfa_fit) %*% t(jac_a2))) +# Correction[2, 1] +sum(diag(lavInspect(cfa_fit, what = "est")$theta %*% + jac_a2 %*% vcov(cfa_fit) %*% t(jac_a1))) +fsy <- R2spa::get_fs( + data.frame(y) |> setNames(paste0("y", 1:7)), + model = "f1 =~ y1 + y2 + y3 + y4\nf2 =~ y5 + y6 + y7", + std.lv = TRUE) +attr(fsy, which = "fsT") +# Using `get_fs(..., corrected_fsT = TRUE) +fsy2 <- R2spa::get_fs( + data.frame(y) |> setNames(paste0("y", 1:7)), + model = "f1 =~ y1 + y2 + y3 + y4\nf2 =~ y5 + y6 + y7", + std.lv = TRUE, + corrected_fsT = TRUE) +attr(fsy2, which = "fsT") +``` + diff --git a/vignettes/efa-score.Rmd b/vignettes/efa-score.Rmd new file mode 100644 index 0000000..531616e --- /dev/null +++ b/vignettes/efa-score.Rmd @@ -0,0 +1,128 @@ +--- +title: "EFA Scores" +author: "Mark Lai" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{efa-scores} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette demonstrates how to use the `compute_fscore()` function to calculate factor scores based on exploratory factor analysis, and compare the results to those calculated by the `psych::factor.scores()` function. + +```{r} +library(psych) +data(bfi, package = "psych") +library(lavaan) +``` + +## Example: Big Five Inventory + +```{r} +# Covariance (with FIML) +corr_bfi <- lavCor(bfi[1:25], missing = "fiml") +# EFA (Target rotation) +target_mat_bfi <- matrix(0, nrow = 25, ncol = 5) +target_mat_bfi[1:5, 1] <- NA +target_mat_bfi[6:10, 2] <- NA +target_mat_bfi[11:15, 3] <- NA +target_mat_bfi[16:20, 4] <- NA +target_mat_bfi[21:25, 5] <- NA +fa_target_bfi <- psych::fa( + corr_bfi, n.obs = 2436, + nfactors = 5, + rotate = "targetQ", Target = target_mat_bfi, + scores = "Bartlett") +# Factor correlations +fa_target_bfi$Phi |> + (`[`)(paste0("MR", 1:5), paste0("MR", 1:5)) |> + knitr::kable(digits = 2, caption = "EFA Factor Correlation") +``` + +```{r} +# Correlation with sum scores +bfi |> + transform(A = (7 - A1) + A2 + A3 + A4 + A5, + C = C1 + C2 + C3 + (7 - C4) + (7 - C5), + E = (7 - E1) + (7 - E2) + E3 + E4 + E5, + N = N1 + N2 + N3 + N4 + N5, + O = O1 + (7 - O2) + O3 + O4 + (7 - O5)) |> + subset(select = A:O) |> + cor(use = "complete") |> + knitr::kable(digits = 2, caption = "Sum scores") +``` + +Hand calculate the Bartlett scores using weights + +```{r} +# Bartlett score for first person +bscores <- + psych::factor.scores(bfi[, 1:25], f = fa_target_bfi, + method = "Bartlett") +fa_target_bfi$weights +# Calculation by hand +y1 <- scale(bfi[, 1:25])[1, ] # z-score +crossprod(fa_target_bfi$weights, as.matrix(y1)) +# Compare to results from psych::fa() +bscores$scores[1, ] +``` + +```{r} +# Covariance of Bartlett scores +cov(bscores$scores, use = "complete") |> + (`[`)(paste0("MR", 1:5), paste0("MR", 1:5)) |> + knitr::kable(digits = 2, caption = "With Bartlett scores") +``` + +### Using `compute_fscore()` and perform a two-stage analysis + +Use `R2spa::compute_fscore()` + +```{r} +# Obtain error covariances +yc <- scale(bfi[, 1:25]) +yc <- yc[complete.cases(yc), ] +lam <- fa_target_bfi$loadings +colnames(lam) <- c("N", "E", "C", "A", "O") +phi <- fa_target_bfi$Phi +th <- diag(fa_target_bfi$uniquenesses) +# # scoring weights +# a <- solve(crossprod(lam, solve(th, lam)), t(solve(th, lam))) +# ecov_fs <- a %*% th %*% t(a) +# dimnames(ecov_fs) <- rep(list(c("N", "E", "C", "A", "O")), 2) +# Two-stage analysis +library(R2spa) +bfi_fs <- compute_fscore(yc, lambda = lam, theta = th, + method = "Bartlett", center_y = FALSE, + fs_matrices = TRUE) +head(bfi_fs) +# Scoring matrix +attr(bfi_fs, which = "scoring_matrix") +# Error covariance +attr(bfi_fs, which = "fsT") +``` + +Recover factor covariances with 2S-PA + +```{r} +ts_fit <- tspa("", + data = data.frame(bscores$scores) |> + setNames(c("fs_N", "fs_E", "fs_C", "fs_A", "fs_O")), + fsT = attr(bfi_fs, which = "fsT"), + fsL = diag(5) |> + `dimnames<-`(list(c("fs_N", "fs_E", "fs_C", "fs_A", "fs_O"), + c("N", "E", "C", "A", "O")))) +lavInspect(ts_fit, what = "cor.lv") |> + (`[`)(c("A", "C", "E", "N", "O"), c("A", "C", "E", "N", "O")) |> + knitr::kable(digits = 2, caption = "With Bartlett scores and 2S-PA") +``` + + diff --git a/vignettes/multiple-factors.Rmd b/vignettes/multiple-factors.Rmd index e299e0f..1711829 100644 --- a/vignettes/multiple-factors.Rmd +++ b/vignettes/multiple-factors.Rmd @@ -73,8 +73,8 @@ Therefore, we need to specify cross-loadings when using 2S-PA. ```{r} tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat, - vc = attr(fs_dat, "av_efs"), - cross_loadings = attr(fs_dat, "fsA")) + fsT = attr(fs_dat, "fsT"), + fsL = attr(fs_dat, "fsL")) cat(attr(tspa_fit, "tspaModel")) parameterestimates(tspa_fit) ``` @@ -141,8 +141,8 @@ Therefore, we need to specify cross-loadings when using 2S-PA. tspa_fit_3fac <- tspa(model = "dem60 ~ ind60 dem65 ~ ind60 + dem60", data = fs_dat_3fac, - vc = attr(fs_dat_3fac, "av_efs"), - cross_loadings = attr(fs_dat_3fac, "fsA")) + fsT = attr(fs_dat_3fac, "fsT"), + fsL = attr(fs_dat_3fac, "fsL")) cat(attr(tspa_fit_3fac, "tspaModel")) parameterestimates(tspa_fit_3fac) ``` @@ -193,4 +193,4 @@ sem_3fac <- sem(" data = PoliticalDemocracy ) standardizedSolution(sem_3fac) -``` \ No newline at end of file +``` diff --git a/vignettes/sim_correction-error-multi.RDS b/vignettes/sim_correction-error-multi.RDS new file mode 100644 index 0000000..2c89aa9 Binary files /dev/null and b/vignettes/sim_correction-error-multi.RDS differ diff --git a/vignettes/sim_correction-error.RDS b/vignettes/sim_correction-error.RDS new file mode 100644 index 0000000..1c1551a Binary files /dev/null and b/vignettes/sim_correction-error.RDS differ diff --git a/vignettes/tspa-growth-vignette.Rmd b/vignettes/tspa-growth-vignette.Rmd new file mode 100644 index 0000000..d0ab5dc --- /dev/null +++ b/vignettes/tspa-growth-vignette.Rmd @@ -0,0 +1,176 @@ +--- +title: "Linear Growth Modeling with Two-Stage Path Analysis" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{tspa-growth-vignette} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r, message = FALSE} +library(lavaan) +library(R2spa) +``` + +We use the example from [this tutorial](https://quantdev.ssri.psu.edu/tutorials/growth-modeling-chapter-14-modeling-change-latent-variables-measured-continuous) from Chapter 14 of the book *Growth Modeling* (Grimm, Ram & Estabrook, 2017) to demonstrate how to perform linear growth modeling with two-stage path analysis (2S-PA) + +```{r} +# Load data +eclsk <- read.csv( + "https://quantdev.ssri.psu.edu/sites/qdev/files/ECLS_Science.csv", + header = TRUE +) +``` + +## Joint structural equation model: Latent growth with strict invariance model + +In the [tutorial](https://quantdev.ssri.psu.edu/tutorials/growth-modeling-chapter-14-modeling-change-latent-variables-measured-continuous), the authors first performed longitudinal invariance testing and found support for strict invariance. They moved on to fit a latent growth model based on the strict invariance model, as shown below. + +```{r} +jsem_growth_mod <- " +# factor loadings (with constraints) +eta1 =~ 15.1749088 * s_g3 + l2 * r_g3 + l3 * m_g3 +eta2 =~ 15.1749088 * s_g5 + l2 * r_g5 + l3 * m_g5 +eta3 =~ 15.1749088 * s_g8 + l2 * r_g8 + l3 * m_g8 + +# factor variances +eta1 ~~ psi * eta1 +eta2 ~~ psi * eta2 +eta3 ~~ psi * eta3 + +# unique variances/covariances +s_g3 ~~ u1 * s_g3 + s_g5 + s_g8 +s_g5 ~~ u1 * s_g5 + s_g8 +s_g8 ~~ u1 * s_g8 +r_g3 ~~ u2 * r_g3 + r_g5 + r_g8 +r_g5 ~~ u2 * r_g5 + r_g8 +r_g8 ~~ u2 * r_g8 +m_g3 ~~ u3 * m_g3 + m_g5 + m_g8 +m_g5 ~~ u3 * m_g5 + m_g8 +m_g8 ~~ u3 * m_g8 + +# observed variable intercepts +s_g3 ~ i1 * 1 +s_g5 ~ i1 * 1 +s_g8 ~ i1 * 1 +r_g3 ~ i2 * 1 +r_g5 ~ i2 * 1 +r_g8 ~ i2 * 1 +m_g3 ~ i3 * 1 +m_g5 ~ i3 * 1 +m_g8 ~ i3 * 1 + +# latent basis model +i =~ 1 * eta1 + 1 * eta2 + 1 * eta3 +s =~ 0 * eta1 + start(.5) * eta2 + 1 * eta3 + +i ~~ start(.8) * i +s ~~ start(.5) * s +i ~~ start(0) * s + +i ~ 0 * 1 +s ~ 1 +" +jsem_growth_fit <- lavaan(jsem_growth_mod, + data = eclsk, + meanstructure = TRUE, + estimator = "ML", + fixed.x = FALSE) +``` + +## 2S-PA: Latent growth with strict invariance model + +With 2S-PA, we first specify the strict invariance model, directly adopted from the [tutorial](https://quantdev.ssri.psu.edu/tutorials/growth-modeling-chapter-14-modeling-change-latent-variables-measured-continuous). Based on the strict invariance model, we then obtain the factor scores. + +```{r} +strict_mod <- " +# factor loadings +eta1 =~ 15.1749088 * s_g3 + l2 * r_g3 + l3 * m_g3 +eta2 =~ 15.1749088 * s_g5 + l2 * r_g5 + L3 * m_g5 +eta3 =~ 15.1749088 * s_g8 + l2 * r_g8 + L3 * m_g8 + +# factor variances/covariances +eta1 ~~ 1 * eta1 + eta2 + eta3 +eta2 ~~ eta2 + eta3 +eta3 ~~ eta3 + +# unique variances/covariances +s_g3 ~~ u1 * s_g3 + s_g5 + s_g8 +s_g5 ~~ u1 * s_g5 + s_g8 +s_g8 ~~ u1 * s_g8 +r_g3 ~~ u2 * r_g3 + r_g5 + r_g8 +r_g5 ~~ u2 * r_g5 + r_g8 +r_g8 ~~ u2 * r_g8 +m_g3 ~~ u3 * m_g3 + m_g5 + m_g8 +m_g5 ~~ u3 * m_g5 + m_g8 +m_g8 ~~ u3 * m_g8 + +# latent variable intercepts +eta1 ~ 0 * 1 +eta2 ~ 1 +eta3 ~ 1 + +# observed variable intercepts +s_g3 ~ i1 * 1 +s_g5 ~ i1 * 1 +s_g8 ~ i1 * 1 +r_g3 ~ i2 * 1 +r_g5 ~ i2 * 1 +r_g8 ~ i2 * 1 +m_g3 ~ i3 * 1 +m_g5 ~ i3 * 1 +m_g8 ~ i3 * 1 +" +``` + +```{r} +# Get factor scores +fs_dat <- get_fs(eclsk, model = strict_mod) +``` + +In the second stage, we use the factor scores obtained from the strict invariance model to model the growth trajectory across time points. The growth model is the same as the "latent basis model" in the joint structural equation model. + +```{r} +# Growth model +tspa_growth_mod <- " +i =~ 1 * eta1 + 1 * eta2 + 1 * eta3 +s =~ 0 * eta1 + start(.5) * eta2 + 1 * eta3 + +# factor variances +eta1 ~~ psi * eta1 +eta2 ~~ psi * eta2 +eta3 ~~ psi * eta3 + +i ~~ start(.8) * i +s ~~ start(.5) * s +i ~~ start(0) * s + +i ~ 0 * 1 +s ~ 1 +" +# Fit the growth model +tspa_growth_fit <- tspa(tspa_growth_mod, fs_dat, + fsT = attr(fs_dat, "fsT"), + fsL = attr(fs_dat, "fsL"), + fsb = attr(fs_dat, "fsb"), + estimator = "ML") +``` + +## Comparison + +The joint structural equation model and 2S-PA yielded comparable estimates of the mean and variances of the intercept and slope factors. + +```{r} +parameterestimates(tspa_growth_fit) |> + subset(subset = lhs %in% c("i", "s") & op %in% c("~1", "~~")) +parameterestimates(jsem_growth_fit) |> + subset(subset = lhs %in% c("i", "s") & op %in% c("~1", "~~")) +``` + diff --git a/vignettes/tspa-plot-vignette.Rmd b/vignettes/tspa-plot-vignette.Rmd new file mode 100644 index 0000000..620701d --- /dev/null +++ b/vignettes/tspa-plot-vignette.Rmd @@ -0,0 +1,213 @@ +--- +title: "Diagnostic plots for `tspa` models" +author: "Jimmy Zhang" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{tspa-diagnostic-plot-vignette} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +The example is from . + +Load packages + +```{r load-pkg, message = FALSE} +library(lavaan) +library(R2spa) +``` + +## Single group, single predictor + +```{r} +model <- ' + # latent variable definitions + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + a*y2 + b*y3 + c*y4 + + # regressions + dem60 ~ ind60 +' + +fs_dat_ind60 <- get_fs(data = PoliticalDemocracy, + model = "ind60 =~ x1 + x2 + x3") +fs_dat_dem60 <- get_fs(data = PoliticalDemocracy, + model = "dem60 =~ y1 + y2 + y3 + y4") +fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60) + +tspa_fit_1 <- tspa(model = "dem60 ~ ind60", + data = fs_dat, + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472), + meanstructure = T) +``` + +We recommend researchers to examine the diagnostic plot of factor scores in the model. They can use the function `tspa_plot()` to obtain two plots: (a) a scatter plot between factors, (b) a residual plot between factors. + +Features of `tspa_plot()`: + +- The function is able to pass arguments to the base R `plot()` function. +- Users can define the title of the scatter plot and label names of axis. +- Abbreviation argument allows users to choose whether using abbreviated group names. +- Users can choose whether generating the plot one by one by hitting the on the keyboard. + +```{r example_single_group_single_factor} +# par(mar = c(2,2,3,2)) +tspa_plot(tspa_fit_1, + col = "blue", + cex.lab = 1.2, + cex.axis = 1, + fscores_type = "original", + ask = TRUE) +``` + +### Using estimates from `tspa()` function + +```{r include=FALSE} +latent_estimates <- data.frame(lavPredict(tspa_fit_1)) +plot(latent_estimates$ind60, + latent_estimates$dem60, + main = "Scatterplot", + xlab = "ind60", + ylab = "dem60", + pch = 16, + ps = 1.25, + cex.lab = 1.2, + cex.axis = 1.2) +slope <- coef(tspa_fit_1)["dem60~ind60"] +abline(a = unname(coef(tspa_fit_1)["fs_ind60~1"] - slope*coef(tspa_fit_1)["fs_dem60~1"]), + b = slope, + lwd = 2) +``` + +## Single group, multiple predictors + +```{r} +model <- ' + # latent variable definitions + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 + dem65 =~ y5 + y6 + y7 + y8 + + # regressions + dem60 ~ ind60 + dem65 ~ ind60 + dem60 + + # # residual correlations + # y1 ~~ y5 + # y2 ~~ y4 + y6 + # y3 ~~ y7 + # y4 ~~ y8 + # y6 ~~ y8 +' + +fs_dat_ind60 <- get_fs(data = PoliticalDemocracy, + model = "ind60 =~ x1 + x2 + x3") +fs_dat_dem60 <- get_fs(data = PoliticalDemocracy, + model = "dem60 =~ y1 + y2 + y3 + y4") +fs_dat_dem65 <- get_fs(data = PoliticalDemocracy, + model = "dem65 =~ y5 + y6 + y7 + y8") +fs_dat <- cbind(fs_dat_ind60, fs_dat_dem60, fs_dat_dem65) + +tspa_fit_2 <- tspa(model = "dem60 ~ ind60 + dem65 ~ ind60 + dem60", + data = fs_dat, + se_fs = list(ind60 = 0.1213615, dem60 = 0.6756472, + dem65 = 0.5724405)) +``` + +```{r example_single_group_multiple_factors} +# Title, xlab, and ylab each with same names. +tspa_plot(tspa_fit_2, + ps = 10, + col = "blue", + cex.lab = 1.2, + cex.axis = 1) + +# Title, xlab, and ylab each with separate names. +tspa_plot(tspa_fit_2, + ps = 10, + col = "darkgray", + cex.lab = 1.2, + cex.axis = 1, + title = c("Scatterplot_I", "Scatterplot_II", "Scatterplot_III"), + label_x = c("factor_1", "factor_2", "factor_3"), + label_y = c("factor_a", "factor_b", "factor_c")) +``` + +### Using estimates from `tspa()` function + +```{r include=FALSE} +# dem60 and ind60 +latent_estimates <- data.frame(lavPredict(tspa_fit_2)) +lavInspect(tspa_fit_2, what = "est") +# update(tspa_fit_2, data = lavInspect(tspa_fit_2, what = "data")) +# class(tspa_fit_2) + +plot(latent_estimates$ind60, + latent_estimates$dem60, + main = "Scatterplot", + xlab = "ind60", + ylab = "dem60", + pch = 16, + ps = 1.25, + cex.lab = 1.2, + cex.axis = 1.2) + +slope <- coef(tspa_fit_2)["dem60~ind60"] +abline(a = 0, b = slope, lwd = 2) +``` + +## Multigroup, single predictor + +```{r} +model <- ' + # latent variable definitions + visual =~ x1 + x2 + x3 + speed =~ x7 + x8 + x9 + + # regressions + visual ~ speed +' + +# get factor scores +fs_dat_visual <- get_fs(model = "visual =~ x1 + x2 + x3", + data = HolzingerSwineford1939, + group = "school") +fs_dat_speed <- get_fs(model = "speed =~ x7 + x8 + x9", + data = HolzingerSwineford1939, + group = "school") +fs_hs <- cbind(do.call(rbind, fs_dat_visual), + do.call(rbind, fs_dat_speed)) + +tspa_fit_3 <- tspa(model = "visual ~ speed", + data = fs_hs, + se_fs = data.frame(visual = c(0.3391326, 0.311828), + speed = c(0.2786875, 0.2740507)), + group = "school" + # group.equal = "regressions" + ) +``` + +```{r example_multiple_group_single_factor} +tspa_plot(tspa_fit_3, + ps = 10, + col = "darkgray", + cex.lab = 1.2, + ask = FALSE, + cex.axis = 1) + +tspa_plot(tspa_fit_3, + ps = 10, + col = "darkgray", + cex.lab = 1.2, + cex.axis = 1, + ask = TRUE, + abbreviation = FALSE) +``` diff --git a/vignettes/tspa-vignette-mx.Rmd b/vignettes/tspa-vignette-mx.Rmd new file mode 100644 index 0000000..939bc96 --- /dev/null +++ b/vignettes/tspa-vignette-mx.Rmd @@ -0,0 +1,296 @@ +--- +title: "Multi-Factor Measurement Model (OpenMx)" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{tspa-vignette-mx} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r, message = FALSE} +library(lavaan) +library(R2spa) +library(OpenMx) +library(umx) +library(mirt) +``` + +The example is from https://lavaan.ugent.be/tutorial/sem.html. + +```{r} +# CFA +my_cfa <- " + # latent variables + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 +" +(fs_dat <- get_fs(PoliticalDemocracy, model = my_cfa, std.lv = TRUE)) |> head() +``` + +```{r} +tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat, + fsT = attr(fs_dat, "fsT"), + fsL = attr(fs_dat, "fsL")) +parameterestimates(tspa_fit) +``` + +Using OpenMx + +1. Create OpenMx model without latent variables + +```{r} +fsreg_umx <- umxLav2RAM( + " + fs_dem60 ~ fs_ind60 + fs_dem60 + fs_ind60 ~ 1 + ", + printTab = FALSE) +``` + +- Create loading and error covariance matrices (need reordering) + +```{r} +# Loading +matL <- mxMatrix( + type = "Full", nrow = 2, ncol = 2, + free = FALSE, + values = attr(fs_dat, "fsL")[2:1, 2:1], + name = "L" +) +# Error +matE <- mxMatrix( + type = "Symm", nrow = 2, ncol = 2, + free = FALSE, + values = attr(fs_dat, "fsT")[2:1, 2:1], + name = "E" +) +``` + +- Run 2S-PA + +```{r} +tspa_mx <- tspa_mx_model(fsreg_umx, data = fs_dat, + mat_ld = matL, mat_vc = matE) +# Run OpenMx +tspa_mx_fit <- mxRun(tspa_mx) +# Summarize the results (takes 20 seconds or so) +summary(tspa_mx_fit) +# Standardize coefficients +mxStandardizeRAMpaths(tspa_mx_fit, SE = TRUE)$m1[1, ] +``` + +```{r, eval = FALSE} +# Alternative specification +tspa_mx <- tspa_mx_model(fsreg_umx, data = fs_dat, + mat_ld = attr(fs_dat, "fsL"), + mat_vc = attr(fs_dat, "fsT")) +``` + +- Compare to joint model with OpenMx + +```{r} +jreg_umx_fit <- umxRAM( + " + # latent variables + ind60 =~ x1 + x2 + x3 + dem60 =~ y1 + y2 + y3 + y4 + # latent regression + dem60 ~ ind60 + ", + data = PoliticalDemocracy) +mxStandardizeRAMpaths(jreg_umx_fit, SE = TRUE)[4, ] +``` + +# Combined with IRT + +Example from Lai & Hsiao (2021, Psychological Methods) + +```{r} +# Simulate data with mirt +set.seed(1234) +num_obs <- 1000 +# Simulate theta +eta <- MASS::mvrnorm(num_obs, mu = c(0, 0), Sigma = diag(c(1, 1 - 0.5^2)), + empirical = TRUE) +th1 <- eta[, 1] +th2 <- -1 + 0.5 * th1 + eta[, 2] +# items and response data +a1 <- matrix(1, 10) +d1 <- matrix(rnorm(10)) +a2 <- matrix(runif(10, min = 0.5, max = 1.5)) +d2 <- matrix(rnorm(10)) +dat1 <- simdata(a = a1, d = d1, N = num_obs, itemtype = "2PL", Theta = th1) +dat2 <- simdata(a = a2, d = d2, N = num_obs, itemtype = "2PL", Theta = th2) +# Factor scores +mod1 <- mirt(dat1, model = 1, itemtype = "Rasch", verbose = FALSE) +mod2 <- mirt(dat2, model = 1, itemtype = "2PL", verbose = FALSE) +fs1 <- fscores(mod1, full.scores.SE = TRUE) +fs2 <- fscores(mod2, full.scores.SE = TRUE) +lm(fs2[, 1] ~ fs1[, 1]) # attenuated coefficient +``` + +Joint model + +- With WLS + +```{r} +dat <- cbind(dat1, dat2) +colnames(dat) <- paste0("i", 1:20) +wls_fit <- sem(" +f1 =~ i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 +f2 =~ i11 + i12 + i13 + i14 + i15 + i16 + i17 + i18 + i19 + i20 +f2 ~ f1 +", data = dat, ordered = TRUE, std.lv = TRUE) +coef(wls_fit)["f2~f1"] +``` + +- With Maximum Likelihood + +Note: This is extremely computational intensive + +Using `tspa_mx_model()` + +```{r} +# Combine into data set +fs_dat <- cbind(fs1, fs2) |> + as.data.frame() |> + setNames(c("fs1", "se_fs1", "fs2", "se_fs2")) |> + # Compute reliability and error variances + within(expr = { + rel_fs1 <- 1 - se_fs1^2 + rel_fs2 <- 1 - se_fs2^2 + ev_fs1 <- se_fs1^2 * (1 - se_fs1^2) + ev_fs2 <- se_fs2^2 * (1 - se_fs2^2) + }) +``` + +```{r} +# Loading +matL <- mxMatrix( + type = "Diag", nrow = 2, ncol = 2, + free = FALSE, + labels = c("data.rel_fs2", "data.rel_fs1"), + name = "L" +) +# Error +matE <- mxMatrix( + type = "Diag", nrow = 2, ncol = 2, + free = FALSE, + labels = c("data.ev_fs2", "data.ev_fs1"), + name = "E" +) +``` + +```{r} +fsreg_umx <- umxLav2RAM( + " + fs2 ~ fs1 + fs2 + fs1 ~ 1 + ", + printTab = FALSE) +``` + +```{r} +tspa_mx <- tspa_mx_model(fsreg_umx, data = fs_dat, + mat_ld = matL, mat_vc = matE) +# Run OpenMx +tspa_mx_fit <- mxRun(tspa_mx) +# Summarize the results +summary(tspa_mx_fit) +``` + +Alternatively, one can use named matrices (`mat_ld` and `mat_vc`) to specify the names of the columns for the loading and error covariance matrices. + +```{r} +cross_load <- matrix(c("rel_fs2", NA, NA, "rel_fs1"), nrow = 2) |> + `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +err_cov <- matrix(c("ev_fs2", NA, NA, "ev_fs1"), nrow = 2) |> + `dimnames<-`(rep(list(c("fs2", "fs1")), 2)) +tspa_mx <- tspa_mx_model(fsreg_umx, data = fs_dat, + mat_ld = cross_load, mat_vc = err_cov) +# Run OpenMx +tspa_mx_fit <- mxRun(tspa_mx) +# Summarize the results +summary(tspa_mx_fit) +``` + +- Compare to joint model with OpenMx + +Note: FIML couldn't finish within 12 hours + +```{r} +jreg_umx <- umxRAM( + " + f1 =~ i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 + f2 =~ i11 + i12 + i13 + i14 + i15 + i16 + i17 + i18 + i19 + i20 + f2 ~ f1 + i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 ~ 0 + i11 + i12 + i13 + i14 + i15 + i16 + i17 + i18 + i19 + i20 ~ 0 + i1 ~~ 1 * i1 + i2 ~~ 1 * i2 + i3 ~~ 1 * i3 + i4 ~~ 1 * i4 + i5 ~~ 1 * i5 + i6 ~~ 1 * i6 + i7 ~~ 1 * i7 + i8 ~~ 1 * i8 + i9 ~~ 1 * i9 + i10 ~~ 1 * i10 + i11 ~~ 1 * i11 + i12 ~~ 1 * i12 + i13 ~~ 1 * i13 + i14 ~~ 1 * i14 + i15 ~~ 1 * i15 + i16 ~~ 1 * i16 + i17 ~~ 1 * i17 + i18 ~~ 1 * i18 + i19 ~~ 1 * i19 + i20 ~~ 1 * i20 + ", + data = lapply(as.data.frame(dat), FUN = mxFactor, + levels = c(0, 1)) |> + as.data.frame(), + type = "DWLS") +mxStandardizeRAMpaths(jreg_umx_fit, SE = TRUE)[4, ] +``` + +