Skip to content

Commit

Permalink
add sparse_sd() and sparse_median()
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt committed Oct 14, 2024
1 parent 9a8e6bd commit 6c9b3ae
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 1 deletion.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ export(sparse_double)
export(sparse_integer)
export(sparse_logical)
export(sparse_mean)
export(sparse_median)
export(sparse_positions)
export(sparse_sd)
export(sparse_values)
export(sparse_var)
import(rlang)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# sparsevctrs (development version)

* Helper functions `sparse_mean()` and `sparse_var()` has been added. (#49)
* Helper functions `sparse_mean()`, `sparse_var()`, `sparse_sd()`, `sparse_median()` has been added. (#49)

* All sparse vector types now have a significant smaller base object size. (#67)

Expand Down
61 changes: 61 additions & 0 deletions R/sparse_median.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#' Calculate median from sparse vectors
#'
#' @param x A sparse numeric vector.
#' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
#'
#' @details
#' This function, as with any of the other helper functions assumes that the
#' input `x` is a sparse numeric vector. This is done for performance reasons,
#' and it is thus the users responsibility to perform input checking.
#'
#' @return single numeric value.
#'
#' @examples
#' sparse_median(
#' sparse_double(1000, 1, 1000)
#' )
#'
#' sparse_median(
#' sparse_double(1000, 1, 1000, default = 1)
#' )
#'
#' sparse_median(
#' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
#' )
#'
#' sparse_median(
#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
#' )
#'
#' sparse_median(
#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
#' na_rm = TRUE
#' )
#'
#' @export
sparse_median <- function(x, na_rm = FALSE) {
default <- sparse_default(x)
values <- sparse_values(x)
values_len <- length(values)

if (values_len == 0) {
return(default)
}

x_len <- length(x)

if ((x_len / 2) > values_len) {
if (na_rm) {
return(default)
} else {
if (any(is.na(values))) {
return(NA_real_)
} else {

return(default)
}
}
}

stats::median(x, na.rm = na_rm)
}
40 changes: 40 additions & 0 deletions R/sparse_sd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Calculate standard diviation from sparse vectors
#'
#' @param x A sparse numeric vector.
#' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
#'
#' @details
#' This function, as with any of the other helper functions assumes that the
#' input `x` is a sparse numeric vector. This is done for performance reasons,
#' and it is thus the users responsibility to perform input checking.
#'
#' Much like [sd()] it uses the denominator `n-1`.
#'
#' @return single numeric value.
#'
#' @examples
#' sparse_sd(
#' sparse_double(1000, 1, 1000)
#' )
#'
#' sparse_sd(
#' sparse_double(1000, 1, 1000, default = 1)
#' )
#'
#' sparse_sd(
#' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
#' )
#'
#' sparse_sd(
#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
#' )
#'
#' sparse_sd(
#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
#' na_rm = TRUE
#' )
#'
#' @export
sparse_sd <- function(x, na_rm = FALSE) {
sqrt(sparse_var(x, na_rm = na_rm))
}
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ reference:
contents:
- sparse_mean
- sparse_var
- sparse_sd
- sparse_median

- title: Utility Functions
contents:
Expand Down
47 changes: 47 additions & 0 deletions man/sparse_median.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions man/sparse_sd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions tests/testthat/test-sparse_median.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
test_that("sparse_median() works", {
x <- sparse_double(10, 5, 1000)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(10, -10), c(5, 100), 1000)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000)

expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)

expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))

x <- sparse_double(numeric(), integer(), 1000)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(numeric(), integer(), 1000, default = 100)

expect_equal(median(x), sparse_median(x))
})

test_that("sparse_median() edge cases", {
x <- sparse_double(c(10, 10), c(1, 2), 4)

expect_equal(median(x), sparse_median(x))

x <- sparse_double(c(10, 10, NA), c(1, 2, 3), 5)

expect_equal(median(x), sparse_median(x))
expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))
})
40 changes: 40 additions & 0 deletions tests/testthat/test-sparse_sd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
test_that("sparse_sd() works", {
x <- sparse_double(10, 5, 1000)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(c(10, -10), c(5, 100), 1000)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000)

expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE))

x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)

expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE))

x <- sparse_double(numeric(), integer(), 1000)

expect_equal(sd(x), sparse_sd(x))

x <- sparse_double(numeric(), integer(), 1000, default = 100)

expect_equal(sd(x), sparse_sd(x))
})
test_that("multiplication works", {
expect_equal(2 * 2, 4)
})

0 comments on commit 6c9b3ae

Please sign in to comment.