Merge pull request #40 from EmilHvitfeldt/default-value

Add default argument to sparse_double()
r-lib · May 9, 2024 · 4923a31 · 4923a31
2 parents 124571b + 6fce653
commit 4923a31
Show file tree

Hide file tree

Showing 8 changed files with 122 additions and 31 deletions.
diff --git a/R/altrep.R b/R/altrep.R
@@ -1,8 +1,10 @@
 #' Create sparse double vector
 #' 
-#' @param values Numeric vector, values of non-zero entries.
+#' @param values double vector, values of non-zero entries.
 #' @param positions integer vector, indices of non-zero entries.
-#' @param length Integer, Length of vector.
+#' @param length integer, Length of vector.
+#' @param default double, value at indices not specified by `positions`. 
+#'   Defaults to `0`.
 #' 
 #' @details
 #' 
@@ -11,7 +13,8 @@
 #' 
 #' Allowed values for `value` is double and integer values. integer values will
 #' be coerced to doubles. Missing values such as `NA` and `NA_real_` are 
-#' allowed. Everything else is disallowed, This includes `Inf` and `NaN`.
+#' allowed. Everything else is disallowed, This includes `Inf` and `NaN`. The
+#' values are also not supposed to take the same value as `default`.
 #' 
 #' `positions` should be integers or integer-like doubles. Everything else is 
 #' not allowed. Positions should furthermore be positive (`0` not allowed),
@@ -32,12 +35,14 @@
 #'   sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000)
 #' )
 #' @export
-sparse_double <- function(values, positions, length) {
+sparse_double <- function(values, positions, length, default = 0) {
+  check_number_decimal(default)
   check_number_whole(length, min = 0)
   if (!is.integer(length)) {
     length <- as.integer(length)
   }
 
+
   if (identical(values, NA)) {
     values <- NA_real_
   }
@@ -162,24 +167,25 @@ sparse_double <- function(values, positions, length) {
     )
   }
 
-  if (any(values == 0, na.rm = TRUE)) {
-    offenders <- which(values == 0)
+  if (any(values == default, na.rm = TRUE)) {
+    offenders <- which(values == default)
     cli::cli_abort(
       c(
-        x = "{.arg values} value must not be 0.",
-        i = "0 values at index: {offenders}."
+        x = "{.arg values} value must not be equal to the default {default}.",
+        i = "{default} values at index: {offenders}."
       )
     )
   }
 
-  new_sparse_double(values, positions, length)
+  new_sparse_double(values, positions, length, default)
 }
 
-new_sparse_double <- function(values, positions, length) {
+new_sparse_double <- function(values, positions, length, default) {
   x <- list(
-    val = values, 
-    pos = positions, 
-    len = length
+    val = values,
+    pos = positions,
+    len = length,
+    default = default
   )
 
   .Call(ffi_altrep_new_sparse_double, x)

diff --git a/man/sparse_double.Rd b/man/sparse_double.Rd
diff --git a/src/altrep-sparse-double.c b/src/altrep-sparse-double.c
@@ -32,11 +32,14 @@ SEXP alrep_sparse_double_Materialize(SEXP x) {
 
   const R_xlen_t len = extract_len(x);
 
+  SEXP default_val = extract_default(x);
+  const double v_default_val = REAL_ELT(default_val, 0);
+
   out = PROTECT(Rf_allocVector(REALSXP, len));
   double* v_out = REAL(out);
 
   for (R_xlen_t i = 0; i < len; ++i) {
-    v_out[i] = 0;
+    v_out[i] = v_default_val;
   }
 
   const R_xlen_t n_positions = Rf_xlength(pos);
@@ -122,7 +125,7 @@ static SEXP altrep_sparse_double_Extract_subset(SEXP x, SEXP indx, SEXP call) {
     ++n_hits;
   }
 
-  SEXP out = PROTECT(Rf_allocVector(VECSXP, 3));
+  SEXP out = PROTECT(Rf_allocVector(VECSXP, 4));
 
   SEXP out_val = Rf_allocVector(REALSXP, n_hits);
   SET_VECTOR_ELT(out, 0, out_val);
@@ -135,11 +138,15 @@ static SEXP altrep_sparse_double_Extract_subset(SEXP x, SEXP indx, SEXP call) {
   SEXP out_length = Rf_ScalarInteger((int) size);
   SET_VECTOR_ELT(out, 2, out_length);
 
-  SEXP names = Rf_allocVector(STRSXP, 3);
+  SEXP out_default = extract_default(x);
+  SET_VECTOR_ELT(out, 3, out_default);
+
+  SEXP names = Rf_allocVector(STRSXP, 4);
   Rf_setAttrib(out, R_NamesSymbol, names);
   SET_STRING_ELT(names, 0, Rf_mkChar("val"));
   SET_STRING_ELT(names, 1, Rf_mkChar("pos"));
   SET_STRING_ELT(names, 2, Rf_mkChar("len"));
+  SET_STRING_ELT(names, 3, Rf_mkChar("default"));
 
   R_xlen_t i_out = 0;
 
@@ -207,6 +214,9 @@ static double altrep_sparse_double_Elt(SEXP x, R_xlen_t i) {
 
   const R_xlen_t len = extract_len(x);
 
+  SEXP default_val = extract_default(x);
+  const double v_default_val = REAL_ELT(default_val, 0);
+
   if (i > len) {
     // OOB of vector itself
     return NA_REAL;
@@ -218,7 +228,7 @@ static double altrep_sparse_double_Elt(SEXP x, R_xlen_t i) {
 
   if (loc == size) {
     // Can't find it, must be the default value
-    return 0;
+    return v_default_val;
   } else {
     // Look it up in `val`
     return REAL_ELT(val, loc);

diff --git a/src/sparse-utils.c b/src/sparse-utils.c
@@ -21,6 +21,13 @@ R_xlen_t extract_len(SEXP x) {
   return out;
 }
 
+SEXP extract_default(SEXP x) {
+  SEXP data1 = R_altrep_data1(x);
+  SEXP out = VECTOR_ELT(data1, 3);
+
+  return out;
+}
+
 bool is_altrep(SEXP x) {
   return (bool) ALTREP(x);
 }

diff --git a/src/sparse-utils.h b/src/sparse-utils.h
@@ -11,6 +11,8 @@ SEXP extract_pos(SEXP x);
 
 R_xlen_t extract_len(SEXP x);
 
+SEXP extract_default(SEXP x);
+
 bool is_altrep(SEXP x);
 
 SEXP ffi_extract_altrep_class(SEXP x);

diff --git a/tests/testthat/_snaps/altrep.md b/tests/testthat/_snaps/altrep.md
@@ -223,7 +223,7 @@
       sparse_double(0, 1, 10)
     Condition
       Error in `sparse_double()`:
-      x `values` value must not be 0.
+      x `values` value must not be equal to the default 0.
       i 0 values at index: 1.
 
 ---
@@ -232,9 +232,34 @@
       sparse_double(rep(c(1, 0), 5), 1:10, 50)
     Condition
       Error in `sparse_double()`:
-      x `values` value must not be 0.
+      x `values` value must not be equal to the default 0.
       i 0 values at index: 2, 4, 6, 8, and 10.
 
+# default argument is working
+
+    Code
+      sparse_double(1, 1, 10, default = 1:10)
+    Condition
+      Error in `sparse_double()`:
+      ! `default` must be a number, not an integer vector.
+
+---
+
+    Code
+      sparse_double(1, 1, 10, default = TRUE)
+    Condition
+      Error in `sparse_double()`:
+      ! `default` must be a number, not `TRUE`.
+
+---
+
+    Code
+      sparse_double(c(1, 1, 4), c(1, 4, 6), 10, default = 1)
+    Condition
+      Error in `sparse_double()`:
+      x `values` value must not be equal to the default 1.
+      i 1 values at index: 1 and 2.
+
 # verbose testing
 
     Code

diff --git a/tests/testthat/test-altrep.R b/tests/testthat/test-altrep.R
@@ -220,7 +220,43 @@ test_that("materialization works with sparse_double()", {
   expect_identical(x_sparse[], x_dense)
 })
 
+test_that("default argument is working", {
+  expect_snapshot(
+    error = TRUE,
+    sparse_double(1, 1, 10, default = 1:10)
+  )
+
+  expect_snapshot(
+    error = TRUE,
+    sparse_double(1, 1, 10, default = TRUE)
+  )
+
+  expect_snapshot(
+    error = TRUE,
+    sparse_double(c(1, 1, 4), c(1, 4, 6), 10, default = 1)
+  )
+
+  x_sparse <- sparse_double(
+    value = c(10, NA, 20), 
+    position = c(1, 5, 8), 
+    length = 10, 
+    default = 4
+  )
 
+  x_dense <- c(10, 4, 4, 4, NA, 4, 4, 20, 4, 4)
+
+  for (i in seq_len(10)) {
+    expect_identical(x_sparse[i], x_dense[i])
+  }
+
+  expect_identical(x_sparse[1:2], x_dense[1:2])
+
+  expect_identical(x_sparse[3:7], x_dense[3:7])
+
+  expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)])
+
+  expect_identical(x_sparse[], x_dense)
+})
 
 test_that("is_sparse_vector works", {
   expect_true(is_sparse_vector(sparse_double(1, 1, 1)))

diff --git a/vignettes/design.Rmd b/vignettes/design.Rmd
@@ -22,11 +22,12 @@ The sparsevctrs package produces 3 things; ALTREP classes, matrix/data.frame con
 
 ## Altrep Functions
 
-The functions `sparse_double()` and its relatives are used to construct sparse vectors of the noted type. To work they all need 3 pieces of information:
+The functions `sparse_double()` and its relatives are used to construct sparse vectors of the noted type. To work they all need 4 pieces of information:
 
-- values
-- positions
-- length
+- `values`
+- `positions`
+- `length`
+- `default` (defaults to 0)
 
 The values need to match the type of the function name or be easily coerced into the type (double -> integer). The positions should be integers or doubles that can losslessly be turned into integers. The length should be a single non-negative integer-like value.
 
@@ -40,15 +41,15 @@ The input of these functions mirrors the values stored in the ALTREP class that
 
 3 functions fall into this category:
 
-- coerce_to_sparse_data_frame
-- coerce_to_sparse_tibble
-- coerce_to_sparse_matrix
+- `coerce_to_sparse_data_frame()`
+- `coerce_to_sparse_tibble()`
+- `coerce_to_sparse_matrix()`
 
 the first two take a sparse matrix from the Matrix package and produce a data.frame/tibble with sparse columns. The last one takes a data.frame/tibble with sparse columns and produces a sparse matrix using the Matrix package.
 
 These functions are expected to be inverse of each other, such that `coerce_to_sparse_matrix(coerce_to_sparse_data_frame(x))` returns `x` back. They are made to be highly performant both in terms of speed and memory consumption, Meaning that sparsity is applied when appropriate.
 
-These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible.
+These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. It is in part why data.frames with sparse vectors with different can't be used with `coerce_to_sparse_matrix()` yet.
 
 ## Helper Functions