Skip to content

Commit

Permalink
Merge pull request #11 from jolars/original-saga
Browse files Browse the repository at this point in the history
Redesigned algorithm based on DeFazio 2014
  • Loading branch information
jolars authored Jul 10, 2018
2 parents 744f1ce + fb1a51e commit 5f22c99
Show file tree
Hide file tree
Showing 26 changed files with 1,297 additions and 1,081 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.Rproj.user
.Rhistory
.RData
inst/doc
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ Imports:
Suggests:
covr,
testthat,
glmnet
glmnet,
knitr,
rmarkdown,
latticeExtra
LinkingTo:
Rcpp (>= 0.12.16),
RcppArmadillo (>= 0.8.500.0.0)

RcppEigen (>= 0.3.3.4.0)
VignetteBuilder: knitr
92 changes: 10 additions & 82 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,84 +1,6 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Deviance
#'
#' Computes the deviance of the model given by `weights` and `intercept`.
#'
#' @param x a feature matrix (dense or sparse)
#' @param y a response vector
#' @param weights a vector of coefficients
#' @param intercept an intercept vector
#' @param is_sparse whether x is sparse
#' @param n_samples the number of samples
#' @param n_feature the number of features
#' @param n_classes the number of classes
#'
#' @return Returns the deviance.
#'
#' @noRd
#' @keywords internal
NULL

#' Rescale weights and intercept before returning these to user
#'
#' Currently no processing, and therefore no rescaling, is done
#' when the intercept is fit. If x is sparse.
#'
#' @param weights weights
#' @param weights_archive storage for weights
#' @param intercept intercept
#' @param intercept_archive storage for intercepts on a per-penalty basis
#' @param x_center the offset (mean) used to possibly have centered x
#' @param x_scale the scaling that was applied to x
#' @param y_center the offset (mean) that y was offset with
#' @param y_scale scaling for y
#' @param n_features the number of features
#' @param n_classes number of classes
#' @param fit_intercept whether to fit the intercept
#'
#' @return `weights` and `intercept` are rescaled and stored in weights_archive
#' and intercept_archive.
#'
#' @noRd
NULL

#' Compute Regularization Path
#'
#' This function computes the regularization path as in glmnet so that
#' the first solution is the null solution (if elasticnet_mix != 0).
#'
#' @param lambda lambda values in input -- empty by default
#' @param n_lambda required number of penalties
#' @param lambda_min_ratio smallest lambda_min_ratio
#' @param elasticnet_mix ratio of l1 penalty to l2. Same as alpha in glmnet.
#' @param x feature matrix
#' @param y response vector
#' @param n_samples number of samples
#' @param alpha l2-penalty
#' @param beta l1-penalty
#' @param y_scale scale of y, used only to return lambda values to same
#' scale as in glmnet
#'
#' @return lambda, alpha and beta are updated.
#'
#' @noRd
NULL

#' Adapative transposing of feature matrix
#'
#' For sparse matrices, armadillo does not (yet?) have a inplace
#' transpose method, so we overload for sparse and dense matrices,
#' transposing inplace when x is dense.
#'
#' @param x a sparse or dense matrix
#'
#' @return x transposed.
#'
#' @keywords internal
#' @noRd
NULL

#' Setup sgdnet Model Options
#'
#' Collect parameters from `control` and setup storage for coefficients,
Expand Down Expand Up @@ -113,12 +35,18 @@ NULL
#' * losses: the loss at each outer iteration per fit,
#' * npasses: the number of effective passes (epochs) accumulated over,
#' all lambda values, and
#' * return_codes: the convergence result. 0 mean that the algorithm
#' * return_codes: the convergence result. 0 means that the algorithm
#' converged, 1 means that `max_iter` was reached before the algorithm
#' converged.
#'
NULL

#' @keywords internal
SgdnetCpp <- function(x_in, y, control) {
.Call(`_sgdnet_SgdnetCpp`, x_in, y, control)
#' @noRd
SgdnetDense <- function(x, y, control) {
.Call(`_sgdnet_SgdnetDense`, x, y, control)
}

SgdnetSparse <- function(x, y, control) {
.Call(`_sgdnet_SgdnetSparse`, x, y, control)
}

14 changes: 14 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,17 @@
#' UCI Machine Learning Repository <http://archive.ics.uci.edu/ml>. Irvine,
#' CA: University of California, School of Information and Computer Science.
"mushrooms"

#' Benchmark data for binomial response family
#'
#' @source <https://github.com/jolars/sgdnet/data-raw/
#'
#' @format A `data.frame` with 5 variables and 2000 observations.
#' \describe{
#' \item{dataset}{dataset used}
#' \item{package}{R package}
#' \item{time}{run time in seconds}
#' \item{loss}{objective loss}
#' \item{penalty}{type of penalty, "ridge" or "lasso"}
#' }
"benchmarks_binomial"
6 changes: 5 additions & 1 deletion R/plot.sgdnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ plot.sgdnet <- function(x, xvar = c("norm", "lambda", "dev"), ...) {

plot_data <- utils::stack(as.data.frame(beta))

n_vars = length(unique(plot_data$ind))

plot_args <- list(
x = quote(values ~ xval),
groups = quote(ind),
data = quote(plot_data),
type = "l",
ylab = expression(beta),
auto.key = list(space = "right", lines = TRUE, points = FALSE),
auto.key = if (n_vars <= 10)
list(space = "right", lines = TRUE, points = FALSE)
else FALSE,
abline = within(lattice::trellis.par.get("reference.line"), {h = 0})
)

Expand Down
29 changes: 19 additions & 10 deletions R/sgdnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@
#' The *binomial* family solves the following objective:
#'
#' \deqn{
#' -\frac{1}{n} \sum_{i=1}^n
#' \bigg[y_i (\beta_0 + x_i^\mathsf{T} \beta) - \log\Big(1 + e^{\beta_0 + x_i^\mathsf{T} \beta}\Big)\bigg]
#' \frac{1}{n} \sum_{i=1}^n
#' \log\left[1 + e^{-y_i\left(\beta_0 + x^\mathsf{T} \beta \right)} \right]
#' + \lambda \left( \frac{1 - \alpha}{2} ||\beta||_2^2
#' + \alpha||\beta||_1 \right),
#' }{
#' -1/n \sum [y_i(\beta_0 + x^T \beta) - log(1 + exp(\beta_0 + x^T \beta)]
#' 1/n \sum log {1 + exp[-y_i(\beta_0 + x^T \beta)]}
#' + \lambda [(1 - \alpha)/2 ||\beta||_2^2 + \alpha||\beta||_1],
#' }
#' where \eqn{y_i \in \{0, 1\}}{y ~ {0, 1}}.
#' where \eqn{y_i \in \{-1, 1\}}{y ~ {-1, 1}}.
#'
#' @section Regularization Path:
#' The default regularization path is a sequence of `nlambda`
Expand Down Expand Up @@ -137,8 +137,13 @@ sgdnet.default <- function(x,
# Collect sgdnet-specific options for debugging and more
debug <- getOption("sgdnet.debug")

if (is.null(lambda))
if (is.null(lambda) || is_false(lambda))
lambda <- double(0L)
else
nlambda <- length(lambda)

if (nlambda == 0)
stop("lambda path cannot be of zero length.")

stopifnot(identical(NROW(y), NROW(x)),
!any(is.na(y)),
Expand All @@ -158,7 +163,7 @@ sgdnet.default <- function(x,
gaussian = {
stopifnot(is.numeric(y),
NCOL(y) == 1)
},
},
binomial = {
stopifnot(length(unique(y)) == 2)
y_table <- table(y)
Expand All @@ -169,11 +174,11 @@ sgdnet.default <- function(x,

class_names <- names(y_table)

# Transform response to {0, 1}, which is used internally
# Transform response to {-1, 1}, which is used internally
y <- as.double(y)
y[y == min(y)] <- 0
y[y == min(y)] <- -1
y[y == max(y)] <- 1
}
}
)

y <- as.matrix(y)
Expand All @@ -191,7 +196,11 @@ sgdnet.default <- function(x,
debug = debug)

# Fit the model by calling the Rcpp routine.
res <- SgdnetCpp(x, y, control)
if (is_sparse) {
res <- SgdnetSparse(x, y, control)
} else {
res <- SgdnetDense(x, y, control)
}

lambda <- res$lambda
n_penalties <- length(lambda)
Expand Down
Binary file added R/sysdata.rda
Binary file not shown.
11 changes: 11 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,14 @@ extract_family <- function(x) {
family_index <- inherits(x, paste0("sgdnet_", supported_families), TRUE) > 0
supported_families[family_index]
}

#' Check if x is FALSE
#'
#' @param x Argument to be tested.
#'
#' @return A bool.
#'
#' @keywords internal
is_false <- function(x) {
identical(x, FALSE)
}
110 changes: 110 additions & 0 deletions data-raw/benchmarks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@

# Preprepared benchmark data for vignette ---------------------------------

binomial_loss <- function(fit, X, y, lambda, alpha) {
# tidy up data
n <- NROW(X)
X <- t(X)
y <- as.vector(as.numeric(y))
y[y == min(y)] <- 0
y[y > min(y)] <- 1
beta <- fit$beta
beta0 <- as.vector(fit$a0)

n <- length(y)
# binomial loglikelihood
cXb <- crossprod(X, beta)
loglik <- sum(y*(beta0 + cXb) - log(1 + exp(beta0 + cXb)))

# compute penalty
penalty <- 0.5*(1 - alpha)*sum(beta^2) + alpha*sum(abs(beta))
-loglik/n + lambda*penalty
}

library(SparseM)
library(Matrix)
library(glmnet)
library(gsoc18saga) # https://github.com/jolars/gsoc18saga/

# load datasets
datasets <- list(
mushrooms = mushrooms,
covtype = covtype,
a9a = a9a,
phishing = phishing,
ijcnn1 = ijcnn1
)

# setup tolerance sequence to iterate over
n_tol <- 100
sgdnet_tol <- signif(exp(seq(log(0.95), log(1e-3), length.out = n_tol)), 2)
glmnet_tol <- signif(exp(seq(log(0.95), log(1e-6), length.out = n_tol)), 2)

# setup result data.frame
data_binomial <- data.frame(dataset = character(),
package = character(),
penalty = character(),
time = double(),
loss = double())

# compute timings
for (i in seq_along(datasets)) {
cat(names(datasets)[i], "\n")
X <- as(datasets[[i]]$x, "dgCMatrix")
y <- datasets[[i]]$y
n_obs <- nrow(X)

lambda <- 1/n_obs

for (j in seq_len(n_tol)) {
set.seed(j*i)
cat("\r", j, "/", n_tol)
flush.console()
if (j == n_tol)
cat("\n")

for (penalty in c("ridge", "lasso")) {
alpha <- ifelse(penalty == "ridge", 0, 1)
glmnet_time <- system.time({
glmnet_fit <- glmnet::glmnet(X,
y,
family = "binomial",
lambda = lambda,
alpha = alpha,
intercept = TRUE,
standardize = FALSE,
thresh = glmnet_tol[j])
})

sgdnet_time <- system.time({
sgdnet_fit <- sgdnet::sgdnet(X,
y,
family = "binomial",
lambda = lambda,
alpha = alpha,
intercept = TRUE,
standardize = FALSE,
thresh = sgdnet_tol[j])
})

# retrieve loss
glmnet_loss <- binomial_loss(glmnet_fit, X, y, lambda = lambda, alpha = alpha)
sgdnet_loss <- binomial_loss(sgdnet_fit, X, y, lambda = lambda, alpha = alpha)

data_binomial <- rbind(data_binomial,
data.frame(
dataset = names(datasets)[i],
package = c("glmnet", "sgdnet"),
time = c(glmnet_time[3], sgdnet_time[3]),
loss = c(glmnet_loss, sgdnet_loss),
penalty = penalty
))
}
}
}

library(tidyverse)
benchmarks_binomial <- data_binomial[order(data_binomial$time), ]

usethis::use_data(benchmarks_binomial, overwrite = TRUE)

Binary file added data/benchmarks_binomial.rda
Binary file not shown.
35 changes: 0 additions & 35 deletions man/SgdnetCpp.Rd

This file was deleted.

Loading

0 comments on commit 5f22c99

Please sign in to comment.