-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[r] Add new example dataset and methods for accessing example datasets (
#1298) * Add pbmc-small SOMAExperiment * Add methods for accessing bundled soma objects * Add helper for getting the example data directory * Rename existing bundled pbmc3k obs * Add tests for dataset utils * Update existing tests to use new datasets API * Rename raw-data to extdata * Bump version and update news and docs
- Loading branch information
1 parent
2d4d1b7
commit 508c96c
Showing
15 changed files
with
190 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,3 +23,4 @@ tiledbsoma/libtiledbsoma.tar.gz.current | |
# vscode | ||
^\.vscode$ | ||
^\.devcontainer$ | ||
^data-raw$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#' SOMA Example Datasets | ||
#' | ||
#' @description | ||
#' Access example SOMA objects bundled with the tiledbsoma package. | ||
#' | ||
#' Use `list_datasets()` to list the available datasets and `load_dataset()` to | ||
#' load a dataset into memory using the appropriate SOMA class. The | ||
#' `extract_dataset()` method returns the path to the extracted dataset without | ||
#' loading it into memory. | ||
#' | ||
#' @details | ||
#' The SOMA objects are stored as `tar.gz` files in the package's `extdata` | ||
#' directory. Calling `load_dataset()` extracts the `tar.gz` file to the | ||
#' specified `dir`, inspects its metadata to determine the appropriate SOMA | ||
#' class to instantiate, and returns the SOMA object. | ||
#' | ||
#' @examples | ||
#' soma_pbmc_small <- load_dataset("soma-exp-pbmc-small") | ||
#' | ||
#' @name example-datasets | ||
NULL | ||
|
||
#' @rdname example-datasets | ||
#' @return | ||
#' - `list_datasets()` returns a character vector of the available datasets. | ||
#' @importFrom tools file_path_sans_ext | ||
#' @export | ||
list_datasets <- function() { | ||
data_dir <- example_data_dir() | ||
files <- dir(data_dir, pattern = "tar\\.gz$") | ||
tools::file_path_sans_ext(basename(files), compression = TRUE) | ||
} | ||
|
||
#' @rdname example-datasets | ||
#' @param name The name of the dataset. | ||
#' @param dir The directory where the dataset will be extracted to (default: | ||
#' `tempdir()`). | ||
#' @return | ||
#' - `extract_dataset()` returns the path to the extracted dataset. | ||
#' @export | ||
extract_dataset <- function(name, dir = tempdir()) { | ||
data_dir <- example_data_dir() | ||
tarfiles <- list_datasets() | ||
|
||
stopifnot( | ||
"The specified directory does not exist" = dir.exists(dir), | ||
"Provide the name of a single dataset" = is_scalar_character(name), | ||
assert_subset(name, tarfiles, type = "dataset") | ||
) | ||
|
||
# Extract tar.gz file to dir | ||
tarfile <- dir(data_dir, pattern = name, full.names = TRUE) | ||
stopifnot("The specified dataset does not exist" = file.exists(tarfile)) | ||
|
||
dataset_uri <- file.path(dir, name) | ||
untar(tarfile, exdir = dataset_uri) | ||
dataset_uri | ||
} | ||
|
||
#' @rdname example-datasets | ||
#' @return | ||
#' - `load_dataset()` returns a SOMA object. | ||
#' @export | ||
load_dataset <- function(name, dir = tempdir()) { | ||
dataset_uri <- extract_dataset(name, dir) | ||
|
||
# Inspect the object's metadata | ||
object <- switch( | ||
tiledb::tiledb_object_type(dataset_uri), | ||
"ARRAY" = TileDBArray$new(dataset_uri, internal_use_only = "allowed_use"), | ||
"GROUP" = TileDBGroup$new(dataset_uri, internal_use_only = "allowed_use"), | ||
stop("The dataset is not a TileDB Array or Group", call. = FALSE) | ||
) | ||
|
||
# Instantiate the proper SOMA object | ||
switch( | ||
object$get_metadata("soma_object_type"), | ||
"SOMAExperiment" = SOMAExperimentOpen(dataset_uri), | ||
"SOMADataFrame" = SOMADataFrameOpen(dataset_uri), | ||
stop("The dataset is an unsupported SOMA object", call. = FALSE) | ||
) | ||
} | ||
|
||
example_data_dir <- function() { | ||
system.file("extdata", package = "tiledbsoma", mustWork = TRUE) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Create SOMAExperiment for the pbmc_small dataset | ||
# https://mojaveazure.github.io/seurat-object/reference/pbmc_small.html | ||
|
||
library(tiledbsoma) | ||
library(SeuratObject) | ||
|
||
# load the pbmc_small dataset | ||
data(pbmc_small, package = "SeuratObject") | ||
pbmc_small | ||
|
||
# variables | ||
data_dir <- normalizePath(file.path("inst", "extdata")) | ||
soma_exp_name <- "soma-exp-pbmc-small" | ||
soma_exp_uri <- file.path(tempdir(), soma_exp_name) | ||
tar_file <- file.path(data_dir, paste0(soma_exp_name, ".tar.gz")) | ||
|
||
# create the SOMAExperiment | ||
write_soma(pbmc_small, uri = soma_exp_uri) | ||
|
||
# create tar.gz file containing the SOMAExperiment | ||
od <- setwd(soma_exp_uri) | ||
tar(tar_file, compression = "gzip") | ||
setwd(od) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
test_that("example dataset access", { | ||
expect_length( | ||
list_datasets(), | ||
length(dir(example_data_dir())) | ||
) | ||
|
||
# Test that the dataset can be extracted | ||
dataset_uri <- extract_dataset("soma-exp-pbmc-small") | ||
expect_true(dir.exists(dataset_uri)) | ||
expect_equal(tiledb::tiledb_object_type(dataset_uri), "GROUP") | ||
|
||
# Test the datasets can be loaded | ||
exp <- load_dataset("soma-exp-pbmc-small") | ||
expect_s3_class(exp, "SOMAExperiment") | ||
|
||
sdf <- load_dataset("soma-dataframe-pbmc3k-processed-obs") | ||
expect_s3_class(sdf, "SOMADataFrame") | ||
}) |