From 6c9b3ae75fc76f11d9d967f5b01fc9d5dbd0c8b1 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Mon, 14 Oct 2024 14:45:35 -0700 Subject: [PATCH] add sparse_sd() and sparse_median() --- NAMESPACE | 2 + NEWS.md | 2 +- R/sparse_median.R | 61 +++++++++++++++++++++++++++++ R/sparse_sd.R | 40 +++++++++++++++++++ _pkgdown.yml | 2 + man/sparse_median.Rd | 47 ++++++++++++++++++++++ man/sparse_sd.Rd | 49 +++++++++++++++++++++++ tests/testthat/test-sparse_median.R | 48 +++++++++++++++++++++++ tests/testthat/test-sparse_sd.R | 40 +++++++++++++++++++ 9 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 R/sparse_median.R create mode 100644 R/sparse_sd.R create mode 100644 man/sparse_median.Rd create mode 100644 man/sparse_sd.Rd create mode 100644 tests/testthat/test-sparse_median.R create mode 100644 tests/testthat/test-sparse_sd.R diff --git a/NAMESPACE b/NAMESPACE index 7ae56cd..b6c9564 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,9 @@ export(sparse_double) export(sparse_integer) export(sparse_logical) export(sparse_mean) +export(sparse_median) export(sparse_positions) +export(sparse_sd) export(sparse_values) export(sparse_var) import(rlang) diff --git a/NEWS.md b/NEWS.md index eb91d85..b93186d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # sparsevctrs (development version) -* Helper functions `sparse_mean()` and `sparse_var()` has been added. (#49) +* Helper functions `sparse_mean()`, `sparse_var()`, `sparse_sd()`, `sparse_median()` has been added. (#49) * All sparse vector types now have a significant smaller base object size. (#67) diff --git a/R/sparse_median.R b/R/sparse_median.R new file mode 100644 index 0000000..8865a4e --- /dev/null +++ b/R/sparse_median.R @@ -0,0 +1,61 @@ +#' Calculate median from sparse vectors +#' +#' @param x A sparse numeric vector. +#' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. +#' +#' @details +#' This function, as with any of the other helper functions assumes that the +#' input `x` is a sparse numeric vector. This is done for performance reasons, +#' and it is thus the users responsibility to perform input checking. +#' +#' @return single numeric value. +#' +#' @examples +#' sparse_median( +#' sparse_double(1000, 1, 1000) +#' ) +#' +#' sparse_median( +#' sparse_double(1000, 1, 1000, default = 1) +#' ) +#' +#' sparse_median( +#' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) +#' ) +#' +#' sparse_median( +#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) +#' ) +#' +#' sparse_median( +#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), +#' na_rm = TRUE +#' ) +#' +#' @export +sparse_median <- function(x, na_rm = FALSE) { + default <- sparse_default(x) + values <- sparse_values(x) + values_len <- length(values) + + if (values_len == 0) { + return(default) + } + + x_len <- length(x) + + if ((x_len / 2) > values_len) { + if (na_rm) { + return(default) + } else { + if (any(is.na(values))) { + return(NA_real_) + } else { + + return(default) + } + } + } + + stats::median(x, na.rm = na_rm) +} diff --git a/R/sparse_sd.R b/R/sparse_sd.R new file mode 100644 index 0000000..cb1bab5 --- /dev/null +++ b/R/sparse_sd.R @@ -0,0 +1,40 @@ +#' Calculate standard diviation from sparse vectors +#' +#' @param x A sparse numeric vector. +#' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. +#' +#' @details +#' This function, as with any of the other helper functions assumes that the +#' input `x` is a sparse numeric vector. This is done for performance reasons, +#' and it is thus the users responsibility to perform input checking. +#' +#' Much like [sd()] it uses the denominator `n-1`. +#' +#' @return single numeric value. +#' +#' @examples +#' sparse_sd( +#' sparse_double(1000, 1, 1000) +#' ) +#' +#' sparse_sd( +#' sparse_double(1000, 1, 1000, default = 1) +#' ) +#' +#' sparse_sd( +#' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) +#' ) +#' +#' sparse_sd( +#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) +#' ) +#' +#' sparse_sd( +#' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), +#' na_rm = TRUE +#' ) +#' +#' @export +sparse_sd <- function(x, na_rm = FALSE) { + sqrt(sparse_var(x, na_rm = na_rm)) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 433f759..abbb660 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -30,6 +30,8 @@ reference: contents: - sparse_mean - sparse_var + - sparse_sd + - sparse_median - title: Utility Functions contents: diff --git a/man/sparse_median.Rd b/man/sparse_median.Rd new file mode 100644 index 0000000..ae37863 --- /dev/null +++ b/man/sparse_median.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sparse_median.R +\name{sparse_median} +\alias{sparse_median} +\title{Calculate median from sparse vectors} +\usage{ +sparse_median(x, na_rm = FALSE) +} +\arguments{ +\item{x}{A sparse numeric vector.} + +\item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} +} +\value{ +single numeric value. +} +\description{ +Calculate median from sparse vectors +} +\details{ +This function, as with any of the other helper functions assumes that the +input \code{x} is a sparse numeric vector. This is done for performance reasons, +and it is thus the users responsibility to perform input checking. +} +\examples{ +sparse_median( + sparse_double(1000, 1, 1000) +) + +sparse_median( + sparse_double(1000, 1, 1000, default = 1) +) + +sparse_median( + sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) +) + +sparse_median( + sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) +) + +sparse_median( + sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), + na_rm = TRUE +) + +} diff --git a/man/sparse_sd.Rd b/man/sparse_sd.Rd new file mode 100644 index 0000000..60ae2e1 --- /dev/null +++ b/man/sparse_sd.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sparse_sd.R +\name{sparse_sd} +\alias{sparse_sd} +\title{Calculate standard diviation from sparse vectors} +\usage{ +sparse_sd(x, na_rm = FALSE) +} +\arguments{ +\item{x}{A sparse numeric vector.} + +\item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} +} +\value{ +single numeric value. +} +\description{ +Calculate standard diviation from sparse vectors +} +\details{ +This function, as with any of the other helper functions assumes that the +input \code{x} is a sparse numeric vector. This is done for performance reasons, +and it is thus the users responsibility to perform input checking. + +Much like \code{\link[=sd]{sd()}} it uses the denominator \code{n-1}. +} +\examples{ +sparse_sd( + sparse_double(1000, 1, 1000) +) + +sparse_sd( + sparse_double(1000, 1, 1000, default = 1) +) + +sparse_sd( + sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) +) + +sparse_sd( + sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) +) + +sparse_sd( + sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), + na_rm = TRUE +) + +} diff --git a/tests/testthat/test-sparse_median.R b/tests/testthat/test-sparse_median.R new file mode 100644 index 0000000..e9af9aa --- /dev/null +++ b/tests/testthat/test-sparse_median.R @@ -0,0 +1,48 @@ +test_that("sparse_median() works", { + x <- sparse_double(10, 5, 1000) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(10, -10), c(5, 100), 1000) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) + + x <- sparse_double(numeric(), integer(), 1000) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(numeric(), integer(), 1000, default = 100) + + expect_equal(median(x), sparse_median(x)) +}) + +test_that("sparse_median() edge cases", { + x <- sparse_double(c(10, 10), c(1, 2), 4) + + expect_equal(median(x), sparse_median(x)) + + x <- sparse_double(c(10, 10, NA), c(1, 2, 3), 5) + + expect_equal(median(x), sparse_median(x)) + expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) +}) diff --git a/tests/testthat/test-sparse_sd.R b/tests/testthat/test-sparse_sd.R new file mode 100644 index 0000000..8394f52 --- /dev/null +++ b/tests/testthat/test-sparse_sd.R @@ -0,0 +1,40 @@ +test_that("sparse_sd() works", { + x <- sparse_double(10, 5, 1000) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(c(10, -10), c(5, 100), 1000) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000) + + expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE)) + + x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) + + expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE)) + + x <- sparse_double(numeric(), integer(), 1000) + + expect_equal(sd(x), sparse_sd(x)) + + x <- sparse_double(numeric(), integer(), 1000, default = 100) + + expect_equal(sd(x), sparse_sd(x)) +}) +test_that("multiplication works", { + expect_equal(2 * 2, 4) +})