Merge pull request #11 from jolars/original-saga

Redesigned algorithm based on DeFazio 2014
jolars · Jul 10, 2018 · 5f22c99 · 5f22c99
2 parents 744f1ce + fb1a51e
commit 5f22c99
Show file tree

Hide file tree

Showing 26 changed files with 1,297 additions and 1,081 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,8 +28,11 @@ Imports:
 Suggests: 
     covr,
     testthat,
-    glmnet
+    glmnet,
+    knitr,
+    rmarkdown,
+    latticeExtra
 LinkingTo: 
     Rcpp (>= 0.12.16),
-    RcppArmadillo (>= 0.8.500.0.0)
-
+    RcppEigen (>= 0.3.3.4.0)
+VignetteBuilder: knitr
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,84 +1,6 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-#' Deviance
-#'
-#' Computes the deviance of the model given by `weights` and `intercept`.
-#'
-#' @param x a feature matrix (dense or sparse)
-#' @param y a response vector
-#' @param weights a vector of coefficients
-#' @param intercept an intercept vector
-#' @param is_sparse whether x is sparse
-#' @param n_samples the number of samples
-#' @param n_feature the number of features
-#' @param n_classes the number of classes
-#'
-#' @return Returns the deviance.
-#'
-#' @noRd
-#' @keywords internal
-NULL
-
-#' Rescale weights and intercept before returning these to user
-#'
-#' Currently no processing, and therefore no rescaling, is done
-#' when the intercept is fit. If x is sparse.
-#'
-#' @param weights weights
-#' @param weights_archive storage for weights
-#' @param intercept intercept
-#' @param intercept_archive storage for intercepts on a per-penalty basis
-#' @param x_center the offset (mean) used to possibly have centered x
-#' @param x_scale the scaling that was applied to x
-#' @param y_center the offset (mean) that y was offset with
-#' @param y_scale scaling for y
-#' @param n_features the number of features
-#' @param n_classes number of classes
-#' @param fit_intercept whether to fit the intercept
-#'
-#' @return `weights` and `intercept` are rescaled and stored in weights_archive
-#'   and intercept_archive.
-#'
-#' @noRd
-NULL
-
-#' Compute Regularization Path
-#'
-#' This function computes the regularization path as in glmnet so that
-#' the first solution is the null solution (if elasticnet_mix != 0).
-#'
-#' @param lambda lambda values in input -- empty by default
-#' @param n_lambda required number of penalties
-#' @param lambda_min_ratio smallest lambda_min_ratio
-#' @param elasticnet_mix ratio of l1 penalty to l2. Same as alpha in glmnet.
-#' @param x feature matrix
-#' @param y response vector
-#' @param n_samples number of samples
-#' @param alpha l2-penalty
-#' @param beta l1-penalty
-#' @param y_scale scale of y, used only to return lambda values to same
-#'   scale as in glmnet
-#'
-#' @return lambda, alpha and beta are updated.
-#'
-#' @noRd
-NULL
-
-#' Adapative transposing of feature matrix
-#'
-#' For sparse matrices, armadillo does not (yet?) have a inplace
-#' transpose method, so we overload for sparse and dense matrices,
-#' transposing inplace when x is dense.
-#'
-#' @param x a sparse or dense matrix
-#'
-#' @return x transposed.
-#'
-#' @keywords internal
-#' @noRd
-NULL
-
 #' Setup sgdnet Model Options
 #'
 #' Collect parameters from `control` and setup storage for coefficients,
@@ -113,12 +35,18 @@ NULL
 #'   * losses: the loss at each outer iteration per fit,
 #'   * npasses: the number of effective passes (epochs) accumulated over,
 #'     all lambda values, and
-#'   * return_codes: the convergence result. 0 mean that the algorithm
+#'   * return_codes: the convergence result. 0 means that the algorithm
 #'     converged, 1 means that `max_iter` was reached before the algorithm
 #'     converged.
-#'
+NULL
+
 #' @keywords internal
-SgdnetCpp <- function(x_in, y, control) {
-    .Call(`_sgdnet_SgdnetCpp`, x_in, y, control)
+#' @noRd
+SgdnetDense <- function(x, y, control) {
+    .Call(`_sgdnet_SgdnetDense`, x, y, control)
+}
+
+SgdnetSparse <- function(x, y, control) {
+    .Call(`_sgdnet_SgdnetSparse`, x, y, control)
 }
 
diff --git a/R/data.R b/R/data.R
@@ -68,3 +68,17 @@
 #'   UCI Machine Learning Repository <http://archive.ics.uci.edu/ml>. Irvine,
 #'   CA: University of California, School of Information and Computer Science.
 "mushrooms"
+
+#' Benchmark data for binomial response family
+#'
+#' @source <https://github.com/jolars/sgdnet/data-raw/
+#'
+#' @format A `data.frame` with 5 variables and 2000 observations.
+#' \describe{
+#'   \item{dataset}{dataset used}
+#'   \item{package}{R package}
+#'   \item{time}{run time in seconds}
+#'   \item{loss}{objective loss}
+#'   \item{penalty}{type of penalty, "ridge" or "lasso"}
+#' }
+"benchmarks_binomial"
diff --git a/R/plot.sgdnet.R b/R/plot.sgdnet.R
@@ -43,13 +43,17 @@ plot.sgdnet <- function(x, xvar = c("norm", "lambda", "dev"), ...) {
 
   plot_data <- utils::stack(as.data.frame(beta))
 
+  n_vars = length(unique(plot_data$ind))
+
   plot_args <- list(
     x = quote(values ~ xval),
     groups = quote(ind),
     data = quote(plot_data),
     type = "l",
     ylab = expression(beta),
-    auto.key = list(space = "right", lines = TRUE, points = FALSE),
+    auto.key = if (n_vars <= 10)
+      list(space = "right", lines = TRUE, points = FALSE)
+    else FALSE,
     abline = within(lattice::trellis.par.get("reference.line"), {h = 0})
   )
 

diff --git a/R/sgdnet.R b/R/sgdnet.R
@@ -35,15 +35,15 @@
 #' The *binomial* family solves the following objective:
 #'
 #' \deqn{
-#'   -\frac{1}{n} \sum_{i=1}^n
-#'     \bigg[y_i (\beta_0 + x_i^\mathsf{T} \beta) - \log\Big(1 + e^{\beta_0 + x_i^\mathsf{T} \beta}\Big)\bigg]
+#'   \frac{1}{n} \sum_{i=1}^n
+#'     \log\left[1 + e^{-y_i\left(\beta_0 + x^\mathsf{T} \beta \right)} \right]
 #'   + \lambda \left( \frac{1 - \alpha}{2} ||\beta||_2^2
 #'                    + \alpha||\beta||_1 \right),
 #' }{
-#'   -1/n \sum [y_i(\beta_0 + x^T \beta) - log(1 + exp(\beta_0 + x^T \beta)]
+#'   1/n \sum log {1 + exp[-y_i(\beta_0 + x^T \beta)]}
 #'   + \lambda [(1 - \alpha)/2 ||\beta||_2^2 + \alpha||\beta||_1],
 #' }
-#' where \eqn{y_i \in \{0, 1\}}{y ~ {0, 1}}.
+#' where \eqn{y_i \in \{-1, 1\}}{y ~ {-1, 1}}.
 #'
 #' @section Regularization Path:
 #' The default regularization path is a sequence of `nlambda`
@@ -137,8 +137,13 @@ sgdnet.default <- function(x,
   # Collect sgdnet-specific options for debugging and more
   debug <- getOption("sgdnet.debug")
 
-  if (is.null(lambda))
+  if (is.null(lambda) || is_false(lambda))
     lambda <- double(0L)
+  else
+    nlambda <- length(lambda)
+
+  if (nlambda == 0)
+    stop("lambda path cannot be of zero length.")
 
   stopifnot(identical(NROW(y), NROW(x)),
             !any(is.na(y)),
@@ -158,7 +163,7 @@ sgdnet.default <- function(x,
          gaussian = {
            stopifnot(is.numeric(y),
                      NCOL(y) == 1)
-           },
+         },
          binomial = {
            stopifnot(length(unique(y)) == 2)
            y_table <- table(y)
@@ -169,11 +174,11 @@ sgdnet.default <- function(x,
 
            class_names <- names(y_table)
 
-           # Transform response to {0, 1}, which is used internally
+           # Transform response to {-1, 1}, which is used internally
            y <- as.double(y)
-           y[y == min(y)] <- 0
+           y[y == min(y)] <- -1
            y[y == max(y)] <- 1
-           }
+         }
          )
 
   y <- as.matrix(y)
@@ -191,7 +196,11 @@ sgdnet.default <- function(x,
                   debug = debug)
 
   # Fit the model by calling the Rcpp routine.
-  res <- SgdnetCpp(x, y, control)
+  if (is_sparse) {
+    res <- SgdnetSparse(x, y, control)
+  } else {
+    res <- SgdnetDense(x, y, control)
+  }
 
   lambda <- res$lambda
   n_penalties <- length(lambda)

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/utils.R b/R/utils.R
@@ -26,3 +26,14 @@ extract_family <- function(x) {
   family_index <- inherits(x, paste0("sgdnet_", supported_families), TRUE) > 0
   supported_families[family_index]
 }
+
+#' Check if x is FALSE
+#'
+#' @param x Argument to be tested.
+#'
+#' @return A bool.
+#'
+#' @keywords internal
+is_false <- function(x) {
+  identical(x, FALSE)
+}
diff --git a/data-raw/benchmarks.R b/data-raw/benchmarks.R
@@ -0,0 +1,110 @@
+
+# Preprepared benchmark data for vignette ---------------------------------
+
+binomial_loss <- function(fit, X, y, lambda, alpha) {
+  # tidy up data
+  n <- NROW(X)
+  X <- t(X)
+  y <- as.vector(as.numeric(y))
+  y[y == min(y)] <- 0
+  y[y > min(y)] <- 1
+  beta <- fit$beta
+  beta0 <- as.vector(fit$a0)
+
+  n <- length(y)
+  # binomial loglikelihood
+  cXb <- crossprod(X, beta)
+  loglik <- sum(y*(beta0 + cXb) - log(1 + exp(beta0 + cXb)))
+
+  # compute penalty
+  penalty <- 0.5*(1 - alpha)*sum(beta^2) + alpha*sum(abs(beta))
+  -loglik/n + lambda*penalty
+}
+
+library(SparseM)
+library(Matrix)
+library(glmnet)
+library(gsoc18saga) # https://github.com/jolars/gsoc18saga/
+
+# load datasets
+datasets <- list(
+  mushrooms = mushrooms,
+  covtype = covtype,
+  a9a = a9a,
+  phishing = phishing,
+  ijcnn1 = ijcnn1
+)
+
+# setup tolerance sequence to iterate over
+n_tol <- 100
+sgdnet_tol <- signif(exp(seq(log(0.95), log(1e-3), length.out = n_tol)), 2)
+glmnet_tol <- signif(exp(seq(log(0.95), log(1e-6), length.out = n_tol)), 2)
+
+# setup result data.frame
+data_binomial <- data.frame(dataset = character(),
+                            package = character(),
+                            penalty = character(),
+                            time = double(),
+                            loss = double())
+
+# compute timings
+for (i in seq_along(datasets)) {
+  cat(names(datasets)[i], "\n")
+  X <- as(datasets[[i]]$x, "dgCMatrix")
+  y <- datasets[[i]]$y
+  n_obs <- nrow(X)
+
+  lambda <- 1/n_obs
+
+  for (j in seq_len(n_tol)) {
+    set.seed(j*i)
+    cat("\r", j, "/", n_tol)
+    flush.console()
+    if (j == n_tol)
+      cat("\n")
+
+    for (penalty in c("ridge", "lasso")) {
+      alpha <- ifelse(penalty == "ridge", 0, 1)
+      glmnet_time <- system.time({
+        glmnet_fit <- glmnet::glmnet(X,
+                                     y,
+                                     family = "binomial",
+                                     lambda = lambda,
+                                     alpha = alpha,
+                                     intercept = TRUE,
+                                     standardize = FALSE,
+                                     thresh = glmnet_tol[j])
+      })
+
+      sgdnet_time <- system.time({
+        sgdnet_fit <- sgdnet::sgdnet(X,
+                                     y,
+                                     family = "binomial",
+                                     lambda = lambda,
+                                     alpha = alpha,
+                                     intercept = TRUE,
+                                     standardize = FALSE,
+                                     thresh = sgdnet_tol[j])
+      })
+
+      # retrieve loss
+      glmnet_loss <- binomial_loss(glmnet_fit, X, y, lambda = lambda, alpha = alpha)
+      sgdnet_loss <- binomial_loss(sgdnet_fit, X, y, lambda = lambda, alpha = alpha)
+
+      data_binomial <- rbind(data_binomial,
+                             data.frame(
+                               dataset = names(datasets)[i],
+                               package = c("glmnet", "sgdnet"),
+                               time = c(glmnet_time[3], sgdnet_time[3]),
+                               loss = c(glmnet_loss, sgdnet_loss),
+                               penalty = penalty
+                             ))
+    }
+  }
+}
+
+library(tidyverse)
+benchmarks_binomial <- data_binomial[order(data_binomial$time), ]
+
+usethis::use_data(benchmarks_binomial, overwrite = TRUE)
+
diff --git a/data/benchmarks_binomial.rda b/data/benchmarks_binomial.rda
diff --git a/man/SgdnetCpp.Rd b/man/SgdnetCpp.Rd