From 46d9d79cdde72e80d3c224bfc3b43176d7febc8a Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 10:19:35 -0700 Subject: [PATCH 1/7] test zero rows and cols for coerce_to_sparse_matrix() --- tests/testthat/test-coerce.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/testthat/test-coerce.R b/tests/testthat/test-coerce.R index 51b47ee..4b3a9e9 100644 --- a/tests/testthat/test-coerce.R +++ b/tests/testthat/test-coerce.R @@ -20,6 +20,36 @@ test_that("coerce_to_sparse_matrix() works", { expect_identical(res, exp) }) +test_that("coerce_to_sparse_matrix() with zero rows and columns", { + skip_if_not_installed("Matrix") + + dat <- data.frame() + exp <- Matrix::Matrix(nrow = 0, ncol = 0, sparse = TRUE) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame(x = integer(), y = integer()) + exp <- Matrix::Matrix(nrow = 0, ncol = 2, sparse = TRUE) + colnames(exp) <- c("x", "y") + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + + dat <- data.frame(x = 1:2)[, integer()] + exp <- Matrix::Matrix(nrow = 2, ncol = 0, sparse = TRUE) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + test_that("coerce_to_sparse_matrix() errors on wrong input", { skip_if_not_installed("Matrix") From 90882ad64d846737ea1045fc1e6d3da271c54da6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:15:28 -0700 Subject: [PATCH 2/7] turn zeroes into real zeroes --- R/coerce.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/coerce.R b/R/coerce.R index f925a4d..b092dd7 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -67,6 +67,12 @@ coerce_to_sparse_matrix <- function(x, call = rlang::caller_env(0)) { all_positions <- unlist(all_positions, use.names = FALSE) all_values <- unlist(all_values, use.names = FALSE) + # TODO: maybe faster to do this above? + non_zero <- all_values != 0 + all_rows <- all_rows[non_zero] + all_positions <- all_positions[non_zero] + all_values <- all_values[non_zero] + res <- Matrix::sparseMatrix( i = all_positions, j = all_rows, From 49ab897193016f36d7f59b97c65f9d5ffaf3e887 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:15:40 -0700 Subject: [PATCH 3/7] handle default rownames --- R/coerce.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/R/coerce.R b/R/coerce.R index b092dd7..06d0a46 100644 --- a/R/coerce.R +++ b/R/coerce.R @@ -73,11 +73,21 @@ coerce_to_sparse_matrix <- function(x, call = rlang::caller_env(0)) { all_positions <- all_positions[non_zero] all_values <- all_values[non_zero] + n_row <- nrow(x) + n_col <- ncol(x) + + if (identical(rownames(x), as.character(seq_len(nrow(x))))) { + row_names <- NULL + } else { + row_names <- rownames(x) + } + res <- Matrix::sparseMatrix( i = all_positions, j = all_rows, x = all_values, - dimnames = list(rownames(x), colnames(x)) + dims = c(n_row, n_col), + dimnames = list(row_names, colnames(x)) ) res } From e1ba2a4d2a802db3524dd8bf85d6d0fdf268ffcd Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:15:50 -0700 Subject: [PATCH 4/7] more sparse tests --- tests/testthat/test-coerce.R | 140 +++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/tests/testthat/test-coerce.R b/tests/testthat/test-coerce.R index 4b3a9e9..f216bc3 100644 --- a/tests/testthat/test-coerce.R +++ b/tests/testthat/test-coerce.R @@ -50,6 +50,146 @@ test_that("coerce_to_sparse_matrix() with zero rows and columns", { ) }) +test_that("coerce_to_sparse_matrix() works with single all sparse vector", { + skip_if_not_installed("Matrix") + + exp <- Matrix::Matrix(0, nrow = 10, ncol = 1, sparse = TRUE) + colnames(exp) <- c("x") + + dat <- data.frame(x = rep(0, 10)) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame(x = sparse_integer(integer(), integer(), 10)) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + +test_that("coerce_to_sparse_matrix() works with multiple all sparse vector", { + skip_if_not_installed("Matrix") + + exp <- Matrix::Matrix(0, nrow = 10, ncol = 2, sparse = TRUE) + colnames(exp) <- c("x", "y") + + dat <- data.frame(x = rep(0, 10), y = rep(0, 10)) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(integer(), integer(), 10), + y = sparse_integer(integer(), integer(), 10) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + +test_that("coerce_to_sparse_matrix() works with sparse between dense", { + skip_if_not_installed("Matrix") + + exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 1), nrow = 2, ncol = 3, sparse = TRUE) + colnames(exp) <- c("x", "y", "z") + + dat <- data.frame( + x = c(1, 0), + y = c(0, 0), + z = c(0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 2), + y = c(0, 0), + z = c(0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = c(1, 0), + y = c(0, 0), + z = sparse_integer(1, 2, 2) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 2), + y = c(0, 0), + z = sparse_integer(1, 2, 2) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = c(1, 0), + y = sparse_integer(integer(), integer(), 2), + z = c(0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 2), + y = sparse_integer(integer(), integer(), 2), + z = c(0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = c(1, 0), + y = sparse_integer(integer(), integer(), 2), + z = sparse_integer(1, 2, 2) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 2), + y = sparse_integer(integer(), integer(), 2), + z = sparse_integer(1, 2, 2) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + test_that("coerce_to_sparse_matrix() errors on wrong input", { skip_if_not_installed("Matrix") From 3eae95f1369d4ed2d4f04c8f60dca5f61f1df4ff Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:33:47 -0700 Subject: [PATCH 5/7] more testing --- tests/testthat/test-coerce.R | 94 ++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/testthat/test-coerce.R b/tests/testthat/test-coerce.R index f216bc3..89d200c 100644 --- a/tests/testthat/test-coerce.R +++ b/tests/testthat/test-coerce.R @@ -190,6 +190,100 @@ test_that("coerce_to_sparse_matrix() works with sparse between dense", { ) }) +test_that("coerce_to_sparse_matrix() works with sparse before dense", { + skip_if_not_installed("Matrix") + + exp <- Matrix::Matrix(c(0, 0, 0, 0, 0, 1), nrow = 3, ncol = 2, sparse = TRUE) + colnames(exp) <- c("x", "y") + + dat <- data.frame( + x = c(0, 0, 0), + y = c(0, 0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = c(0, 0, 0), + y = sparse_integer(1, 3, 3) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(integer(), integer(), 3), + y = c(0, 0, 1) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(integer(), integer(), 3), + y = sparse_integer(1, 3, 3) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + +test_that("coerce_to_sparse_matrix() works with sparse after dense", { + skip_if_not_installed("Matrix") + + exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 0), nrow = 3, ncol = 2, sparse = TRUE) + colnames(exp) <- c("x", "y") + + dat <- data.frame( + x = c(1, 0, 0), + y = c(0, 0, 0) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 3), + y = c(0, 0, 0) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = c(1, 0, 0), + y = sparse_integer(integer(), integer(), 3) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) + + dat <- data.frame( + x = sparse_integer(1, 1, 3), + y = sparse_integer(integer(), integer(), 3) + ) + + expect_identical( + coerce_to_sparse_matrix(dat), + exp + ) +}) + test_that("coerce_to_sparse_matrix() errors on wrong input", { skip_if_not_installed("Matrix") From db72b6023d4d159da06799dbc59bc0b798c7320a Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:35:09 -0700 Subject: [PATCH 6/7] add news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index bb2804c..454e5c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,10 @@ * `is_sparse_vector()` has been rewritten for speed improvement. (#76) +* Fixed bugs where `coerce_to_sparse_matrix()` would error for completely sparse columns. (#77) + +* `coerce_to_sparse_matrix()` Now turns dense zeroes into sparse zeroes. (#77) + # sparsevctrs 0.1.0 * Initial CRAN submission. From 155e6fd1fb9ef535d69970d0ad95e9dc7aadc158 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 27 Sep 2024 16:41:35 -0700 Subject: [PATCH 7/7] increase version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f300aa8..aa825da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sparsevctrs Title: Sparse Vectors for Use in Data Frames -Version: 0.1.0.9001 +Version: 0.1.0.9002 Authors@R: c( person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")),