Skip to content

Commit

Permalink
Merge pull request #234 from jr-leary7/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jr-leary7 authored Oct 3, 2024
2 parents 67e3dd6 + fffc00a commit d4b7228
Show file tree
Hide file tree
Showing 15 changed files with 60 additions and 190 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

on:
push:
branches: main
branches:
- dev
- main

name: test-coverage

Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: scLANE
Type: Package
Title: Model Gene Expression Dynamics with Spline-Based NB GLMs, GEEs, & GLMMs
Version: 0.8.3
Authors@R: c(person(given = "Jack", family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")),
Version: 0.8.4
Authors@R: c(person(given = c("Jack", "R."), family = "Leary", email = "j.leary@ufl.edu", role = c("aut", "cre"), comment = c(ORCID = "0009-0004-8821-3269")),
person(given = "Rhonda", family = "Bacher", email = "rbacher@ufl.edu", role = c("ctb", "fnd"), comment = c(ORCID = "0000-0001-5787-476X")))
Description: Our scLANE model uses truncated power basis spline models to build flexible, interpretable models of single cell gene expression over pseudotime or latent time.
The modeling architectures currently supported are negative-binomial GLMs, GEEs, & GLMMs.
Expand Down
6 changes: 1 addition & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method(pull,marge.sumy)
S3method(pull,null.sumy)
export(bootstrapRandomEffects)
export(chooseCandidateGenes)
export(clusterGenes)
Expand Down Expand Up @@ -38,8 +36,6 @@ importFrom(MASS,negative.binomial)
importFrom(MASS,theta.mm)
importFrom(Matrix,Matrix)
importFrom(Matrix,bdiag)
importFrom(Matrix,chol)
importFrom(Matrix,chol2inv)
importFrom(Matrix,colSums)
importFrom(Matrix,rowMeans)
importFrom(Matrix,t)
Expand Down Expand Up @@ -124,7 +120,6 @@ importFrom(purrr,reduce)
importFrom(scales,label_comma)
importFrom(scales,label_number)
importFrom(splines,bs)
importFrom(stats,.lm.fit)
importFrom(stats,as.dist)
importFrom(stats,as.formula)
importFrom(stats,coef)
Expand All @@ -137,6 +132,7 @@ importFrom(stats,fitted)
importFrom(stats,fitted.values)
importFrom(stats,hclust)
importFrom(stats,kmeans)
importFrom(stats,lm.fit)
importFrom(stats,logLik)
importFrom(stats,offset)
importFrom(stats,p.adjust)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Changes in v0.8.4

+ Minor bug fixes.
+ Improved matrix inversion through judicious usage of C++.

# Changes in version 0.8.3

+ Sped up GLMM mode.
Expand Down
10 changes: 5 additions & 5 deletions R/backward_sel_WIC.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @param id.vec A vector of observation IDs that is necessary for fitting a GEE model. Defaults to NULL.
#' @param cor.structure The specified working correlation structure of the GEE model. Must be one of "independence", "ar1", or "exchangeable". Defaults to NULL.
#' @param sandwich.var Should the sandwich variance estimator be used instead of the model-based estimator? Default to FALSE.
#' @param theta.hat An initial estimate of \eqn{\hat{\theta}} used to fit the negative-binomial model when GEE mode is being used.
#' @param theta.hat An initial estimate of \eqn{\hat{\theta}} used to fit the negative-binomial model when GEE mode is being used.
#' @return \code{backward_sel_WIC} returns the Wald statistic from the fitted model (the penalty is applied later on).
#' @references Stoklosa, J. Gibb, H. Warton, D.I. Fast forward selection for Generalized Estimating Equations With a Large Number of Predictor Variables. \emph{Biometrics}, \strong{70}, 110--120.
#' @references Stoklosa, J. and Warton, D.I. (2018). A generalized estimating equation approach to multivariate adaptive regression splines. \emph{Journal of Computational and Graphical Statistics}, \strong{27}, 245--253.
Expand All @@ -23,8 +23,8 @@ backward_sel_WIC <- function(Y = NULL,
B_new = NULL,
is.gee = FALSE,
id.vec = NULL,
cor.structure = NULL,
theta.hat = NULL,
cor.structure = NULL,
theta.hat = NULL,
sandwich.var = FALSE) {
# check inputs
if (is.null(Y) || is.null(B_new)) { stop("Some inputs are missing from backward_sel_WIC().") }
Expand All @@ -38,15 +38,15 @@ backward_sel_WIC <- function(Y = NULL,
corstr = cor.structure,
family = MASS::negative.binomial(theta.hat, link = "log"),
sandwich = sandwich.var)
wald_stat <- (unname(summary(fit)$wald.test[-1]))^2
wald_stat <- unname(summary(fit)$wald.test[-1])^2
} else {
fit <- gamlss::gamlss(Y ~ B_new - 1,
family = "NBI",
trace = FALSE)
withr::with_output_sink(tempfile(), {
fit_sum_mat <- as.matrix(summary(fit))
})
wald_stat <- ((fit_sum_mat[, 3])[-c(1, nrow(fit_sum_mat))])^2
wald_stat <- fit_sum_mat[, 3][-c(1, nrow(fit_sum_mat))]^2
}
return(wald_stat)
}
5 changes: 4 additions & 1 deletion R/biasCorrectGEE.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ biasCorrectGEE <- function(fitted.model = NULL,
W <- as.matrix(Matrix::bdiag(cov_matrices))
X <- fitted.model$X
XWX <- t(X) %*% W %*% X
XWX_inv <- eigenMapMatrixInvert(XWX, n_cores = 1)
XWX_inv <- try({ eigenMapMatrixInvert(XWX, n_cores = 1) }, silent = TRUE)
if (inherits(XWX_inv, "try-error")) {
XWX_inv <- MASS::ginv(XWX)
}
H <- X %*% XWX_inv %*% t(X) %*% W
tr_H <- sum(diag(H))
if (verbose) {
Expand Down
11 changes: 3 additions & 8 deletions R/marge2.R
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ marge2 <- function(X_pred = NULL,
} else {
next
}
} else if (meas_model0$GCVq1 >= -10 || round(score2, 4) > 0) {
} else {
score_term_temp <- c(score_term_temp, score2)
}

Expand Down Expand Up @@ -786,13 +786,8 @@ marge2 <- function(X_pred = NULL,
B_new_2 <- as.matrix(B_new[, -(variable.lowest_2 + 1)])
cnames_2 <- c(cnames_2, list(colnames(B_new_2)))
for (i in 2:(ncol_B - 1)) {
wic1_2 <- backward_sel_WIC(Y = Y,
B_new = B_new_2,
is.gee = is.gee,
id.vec = id.vec,
cor.structure = cor.structure,
sandwich.var = sandwich.var,
theta.hat = theta_hat)
wic1_2 <- backward_sel_WIC(Y = Y,
B_new = B_new_2)
if (i != (ncol_B - 1)) {
wic_mat_2[(i + 1), colnames(B_new_2)[-1]] <- wic1_2
WIC_2 <- sum(apply(wic_mat_2[seq(i + 1), ], 1, min, na.rm = TRUE)) + 2 * ncol(B_new_2)
Expand Down
140 changes: 0 additions & 140 deletions R/pull_sumy.R

This file was deleted.

15 changes: 8 additions & 7 deletions R/score_fun_gee.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#' @author Jakub Stoklosa
#' @author David I. Warton
#' @author Jack Leary
#' @importFrom stats .lm.fit
#' @importFrom Matrix chol chol2inv
#' @importFrom stats lm.fit
#' @importFrom MASS ginv
#' @description Calculate the score statistic for a GEE model.
#' @param Y The response variable. Defaults to NULL.
#' @param N The number of clusters. Defaults to NULL.
Expand Down Expand Up @@ -41,10 +41,8 @@ score_fun_gee <- function(Y = NULL,
# check inputs
if (is.null(Y) || is.null(N) || is.null(n_vec) || is.null(VS.est_list) || is.null(AWA.est_list) || is.null(J2_list) || is.null(Sigma2_list) || is.null(J11.inv) || is.null(JSigma11) || is.null(mu.est) || is.null(V.est) || is.null(B1) || is.null(XA)) { stop("Some inputs to score_fun_gee() are missing.") }
# generate score statistic
reg <- try({ stats::.lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model.
if (inherits(reg, "try-error")) {
score <- NA_real_
} else if (any(is.na(reg$coef))) {
reg <- try({ stats::lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model.
if (inherits(reg, "try-error") || any(is.na(reg$coefficients))) {
score <- NA_real_
} else {
p <- ncol(XA)
Expand Down Expand Up @@ -100,7 +98,10 @@ score_fun_gee <- function(Y = NULL,
B = J21_transpose,
n_cores = 1)
Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3
Sigma_inv <- eigenMapMatrixInvert(A = Sigma, n_cores = 1)
Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1) }, silent = TRUE)
if (inherits(Sigma_inv, "try-error")) {
Sigma_inv <- MASS::ginv(Sigma)
}
temp_prod <- eigenMapMatMult(A = t(B.est),
B = Sigma_inv,
n_cores = 1)
Expand Down
15 changes: 8 additions & 7 deletions R/score_fun_glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#' @author Jakub Stoklosa
#' @author David I. Warton
#' @author Jack Leary
#' @importFrom stats .lm.fit
#' @importFrom Matrix chol chol2inv
#' @importFrom stats lm.fit
#' @importFrom MASS ginv
#' @description Calculate the score statistic for a GLM model.
#' @param Y The response variable. Defaults to NULL.
#' @param VS.est_list A product of matrices. Defaults to NULL.
Expand All @@ -30,10 +30,8 @@ score_fun_glm <- function(Y = NULL,
# check inputs
if (is.null(Y) || is.null(VS.est_list) || is.null(A_list) || is.null(B1_list) || is.null(mu.est) || is.null(V.est) || is.null(B1) || is.null(XA)) { stop("Some inputs to score_fun_glm() are missing.") }
# generate score statistic
reg <- try({ stats::.lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model.
if (inherits(reg, "try-error")) {
score <- NA_real_
} else if (any(is.na(reg$coef))) {
reg <- try({ stats::lm.fit(B1, Y) }, silent = TRUE) # This is not the model fit!! It just checks whether any issues occur for a simple linear regression model.
if (inherits(reg, "try-error") || any(is.na(reg$coefficients))) {
score <- NA_real_
} else {
VS.est_i <- unlist(VS.est_list)
Expand All @@ -55,7 +53,10 @@ score_fun_glm <- function(Y = NULL,
B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i),
B = XA,
n_cores = 1)
XVX_22 <- eigenMapMatrixInvert(A = inv.XVX_22, n_cores = 1)
XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1) }, silent = TRUE)
if (inherits(XVX_22, "try-error")) {
XVX_22 <- MASS::ginv(inv.XVX_22)
}
temp_prod <- eigenMapMatMult(A = B.est,
B = XVX_22,
n_cores = 1)
Expand Down
6 changes: 3 additions & 3 deletions R/stat_out.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' @author Jakub Stoklosa
#' @author David I. Warton
#' @author Jack Leary
#' @importFrom stats .lm.fit fitted.values
#' @importFrom stats lm.fit fitted.values
#' @description Calculate the final RSS / GCV for a fitted model.
#' @param Y The response variable. Defaults to NULL.
#' @param B1 The model matrix of predictor variables. Defaults to NULL.
Expand All @@ -28,8 +28,8 @@ stat_out <- function(Y = NULL,
if (is.null(Y) || is.null(B1) || is.null(TSS) || is.null(GCV.null)) { stop("Some of the arguments to stat_out() are missing.") }
if (GCV.null == 0) { stop("GCV.null argument to stat_out() cannot be 0.") }
N <- length(Y)
reg <- stats::.lm.fit(B1, Y)
if (any(is.na(reg$coef))) {
reg <- try({ stats::lm.fit(B1, Y) }, silent = TRUE)
if (inherits(reg, "try-error") || any(is.na(reg$coefficients))) {
RSS1 <- RSSq1 <- GCV1 <- GCVq1 <- NA_real_
} else {
df1a <- ncol(B1) + pen * (ncol(B1) - 1) / 2 # This matches the earth() package, SAS and Friedman (1991) penalty.
Expand Down
Loading

0 comments on commit d4b7228

Please sign in to comment.