Skip to content

Commit

Permalink
add sparsity helper function
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt committed Jan 14, 2025
1 parent 35720dc commit f517972
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ export(sparse_positions)
export(sparse_sd)
export(sparse_values)
export(sparse_var)
export(sparsity)
import(rlang)
useDynLib(sparsevctrs, .registration = TRUE)
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

* `coerce_to_sparse_matrix()` Now turns dense zeroes into sparse zeroes. (#77)

* `sparsity()` has been added, allows sparsity calculations of data.frames, matrices, and sparse matrices. (#82)

# sparsevctrs 0.1.0

* Initial CRAN submission.
112 changes: 112 additions & 0 deletions R/sparsity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#' Calculate sparsity of data frames, matrices, and sparse matrices
#'
#' Turning data frame with sparse columns into sparse matrix using
#' [Matrix::sparseMatrix()].
#'
#' @param x a data frame, matrix of sparse matrix.
#' @param sample a integer or `NULL`. Number of rows to sample to estimate
#' sparsity. If `NULL` then no sampling is performed. Will not be used when
#' `x` is a sparse matrix. Defaults to `NULL`.
#'
#' @details
#' Only numeric 0s are considered zeroes in this calculations. Missing values,
#' logical vectors and then string `"0"` aren't counted.
#'
#' @return a single number, between 0 and 1.
#'
#' @examples
#'
#' # data frame
#' sparsity(mtcars)
#'
#' # Matrix
#' set.seed(1234)
#' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
#' colnames(mat) <- letters[1:10]
#'
#' sparsity(mat)
#'
#' # Sparse matrix
#' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
#'
#' sparsity(sparse_mat)
#' @export
sparsity <- function(x, sample = NULL) {
check_number_whole(sample, min = 1, allow_null = TRUE)

x_type <- input_type(x)

if (x_type != "sparse_matrix") {
nrows <- nrow(x)
if (!is.null(sample)) {
if (nrows < sample) {
sample <- nrows
}
x <- x[sample(nrows, sample), ]
}
}

res <- switch(
x_type,
data.frame = sparsity_df(x),
matrix = sparsity_mat(x),
sparse_matrix = sparsity_sparse_mat(x)
)

res
}

input_type <- function(x, call = rlang::caller_env()) {
if (is.data.frame(x)) {
return("data.frame")
} else if (is.matrix(x)) {
return("matrix")
} else if (any(methods::is(x) == "sparseMatrix")) {
return("sparse_matrix")
} else {
cli::cli_abort(
"{.arg x} must be a data frame, matrix, or sparse matrix,
Not {.obj_type_friendly {x}}.",
call = call

Check warning on line 70 in R/sparsity.R

View check run for this annotation

Codecov / codecov/patch

R/sparsity.R#L67-L70

Added lines #L67 - L70 were not covered by tests
)
}
}

count_zeroes <- function(x) {
if (!is.numeric(x)) {
return(0)
}

if (is_sparse_vector(x)) {
default <- sparse_default(x)
values <- sparse_values(x)
len <- length(x)

Check warning on line 83 in R/sparsity.R

View check run for this annotation

Codecov / codecov/patch

R/sparsity.R#L81-L83

Added lines #L81 - L83 were not covered by tests

if (default == 0) {
res <- len - length(values)

Check warning on line 86 in R/sparsity.R

View check run for this annotation

Codecov / codecov/patch

R/sparsity.R#L85-L86

Added lines #L85 - L86 were not covered by tests
} else {
res <- length(values)

Check warning on line 88 in R/sparsity.R

View check run for this annotation

Codecov / codecov/patch

R/sparsity.R#L88

Added line #L88 was not covered by tests
}
} else {
res <- sum(x == 0, na.rm = TRUE)
}
res
}

sparsity_df <- function(x) {
res <- vapply(x, count_zeroes, double(1))
res <- sum(res) / (nrow(x) * ncol(x))
res
}

sparsity_mat <- function(x) {
if (!is.numeric(x)) {
return(0)
}
sum(x == 0, na.rm = TRUE) / length(x)
}

sparsity_sparse_mat <- function(x) {
1 - (length(x@x) / length(x))
}

1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ reference:
- sparse_sd
- sparse_median
- sparse_dummy
- sparsity

- title: Utility Functions
contents:
Expand Down
43 changes: 43 additions & 0 deletions man/sparsity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions tests/testthat/_snaps/sparsity.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# works with data.frames sample arg

Code
sparsity(mtcars, sample = 0.4)
Condition
Error in `sparsity()`:
! `sample` must be a whole number or `NULL`, not the number 0.4.

108 changes: 108 additions & 0 deletions tests/testthat/test-sparsity.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
test_that("works with data.frames", {
mtcars_exp_sparsity <- mean(mtcars == 0)

expect_identical(
sparsity(mtcars),
mtcars_exp_sparsity
)
})

test_that("works with non-numeric data.frames", {
vs <- mtcars$vs
mtcars$vs <- 4
mtcars_exp_sparsity <- mean(mtcars == 0)

mtcars$vs <- as.character(vs)

expect_identical(
sparsity(mtcars),
mtcars_exp_sparsity
)

mtcars$vs <- as.logical(vs)

expect_identical(
sparsity(mtcars),
mtcars_exp_sparsity
)

mtcars$vs <- ifelse(vs == 1, 1, NA)

expect_identical(
sparsity(mtcars),
mtcars_exp_sparsity
)
})

test_that("works with data.frames sample arg", {
set.seed(1234)
exp <- mean(mtcars[sample(32, 10), ] == 0)

set.seed(1234)
expect_identical(
sparsity(mtcars, sample = 10),
exp
)

set.seed(1234)
exp <- mean(mtcars == 0)

set.seed(1234)
expect_identical(
sparsity(mtcars, sample = 1000),
exp
)

expect_snapshot(
error = TRUE,
sparsity(mtcars, sample = 0.4)
)
})

test_that("works with matrices", {
mtcars_mat <- as.matrix(mtcars)
mtcars_exp_sparsity <- mean(mtcars_mat == 0)

expect_identical(
sparsity(mtcars_mat),
mtcars_exp_sparsity
)

mtcars_mat[1, 1] <- NA

expect_identical(
sparsity(mtcars_mat),
mtcars_exp_sparsity
)

mtcars_lgl <- apply(mtcars_mat, 2, as.logical)

expect_identical(
sparsity(mtcars_lgl),
0
)

mtcars_chr <- apply(mtcars_mat, 2, as.character)

expect_identical(
sparsity(mtcars_chr),
0
)
})

test_that("works with sparse matrices", {
mtcars_sparse_mat <- coerce_to_sparse_matrix(mtcars)
mtcars_exp_sparsity <- mean(as.logical(mtcars_sparse_mat == 0))

expect_equal(
sparsity(mtcars_sparse_mat),
mtcars_exp_sparsity
)

mtcars_sparse_mat[1, 1] <- NA

expect_equal(
sparsity(mtcars_sparse_mat),
mtcars_exp_sparsity
)
})

0 comments on commit f517972

Please sign in to comment.