diff --git a/apis/r/R/SOMAExperiment.R b/apis/r/R/SOMAExperiment.R index 9742573d5e..d049a2cdcb 100644 --- a/apis/r/R/SOMAExperiment.R +++ b/apis/r/R/SOMAExperiment.R @@ -10,6 +10,285 @@ SOMAExperiment <- R6::R6Class( classname = "SOMAExperiment", inherit = SOMACollectionBase, + public = list( + #' @description Loads the experiment as a \code{\link[SeuratObject]{Seurat}} + #' object + #' + #' @param X_layers A named list of named character vectors describing the + #' measurements to load and the layers within those measurements to read in; + #' for example: \preformatted{ + #' list( + #' RNA = c(counts = "counts", data = "logcounts"), + #' ADT = c(counts = "counts") + #' ) + #' } + #' @template param-obs-index + #' @param var_index A named character of column names in \code{var} for + #' each measurement to use as feature names; for example: \preformatted{ + #' c(RNA = "gene_name", ADT = "protein_name") + #' } + #' Uses \code{paste0("feature", var_joinids())} by default + #' @template param-obs-column-names + #' @param var_column_names A named list of character vectors describing the + #' columns in \code{var} for each measurement to load in as feature-level + #' meta data; for example: \preformatted{ + #' list( + #' RNA = c("vst.mean", "vst.variable"), + #' ADT = c("ensembl_id") + #' ) + #' } + #' By default, loads in entire feature-level meta data for all measurements + #' described in \code{X_layers} + #' @param obsm_layers A named list of character vectors describing the + #' arrays in \code{obsm} for each measurement to load in as + #' dimensional reductions; for example: \preformatted{ + #' list( + #' RNA = c("pca", "umap"), + #' ADT = c("adtpca", "adtumap") + #' ) + #' } + #' By default, loads in all dimensional reductions for all measurements + #' described in \code{X_layers} + #' @param varm_layers A named list of named character vectors describing the + #' arrays in \code{varm} to load in as feature loadings and which array in + #' \code{obsm} they correspond to; for example: \preformatted{ + #' list( + #' RNA = c(pca = "PCs"), + #' ADT = c(adtpca = "ADTPCs") + #' ) + #' } + #' By default, will try to determine \code{varm_layers} from + #' \code{obsm_layers} and load in all loadings for all dimensional + #' reductions for all measurements described in \code{X_layers} + #' @param obsp_layers A named list of character vectors describing the + #' arrays in \code{obsp} for each measurement to load in as + #' nearest neighbor graphs; for example: \preformatted{ + #' list( + #' RNA = c("RNA_nn", "RNA_snn"), + #' ADT = c("ADT_nn") + #' ) + #' } + #' By default, loads in all nearest-neighbor graphs for all measurements for + #' all measurements described in \code{X_layers} + #' + #' @return A \code{\link[SeuratObject]{Seurat}} object + #' + to_seurat = function( + X_layers, + obs_index = NULL, + var_index = NULL, + obs_column_names = NULL, + var_column_names = NULL, + obsm_layers = NULL, + varm_layers = NULL, + obsp_layers = NULL + ) { + .check_seurat_installed() + stopifnot( + "'X_layers' must be named list" = is_named_list( + X_layers, + allow_empty = FALSE + ), + "'obs_index' must be a single character value" = is.null(obs_index) || + is_scalar_character(obs_index), + "'var_index' must be a named character vector" = is_character_or_null(var_index), + "'var_column_names' must be a named list" = is.null(var_column_names) || + is_named_list(var_column_names, allow_empty = FALSE), + "'obsm_layers' must be a named list" = is.null(obsm_layers) || + is_scalar_logical(obsm_layers) || + is_named_list(obsm_layers, allow_empty = FALSE), + "'varm_layers' must be a named list" = is.null(varm_layers) || + is_scalar_logical(varm_layers) || + is_named_list(varm_layers, allow_empty = FALSE), + "'obsp_layers' must be a named list" = is.null(obsp_layers) || + is_scalar_logical(obsp_layers) || + is_named_list(obsp_layers, allow_empty = FALSE) + ) + # Check `X_layers` + if (!all(names(X_layers) %in% self$ms$names())) { + msg <- paste( + "The following measurements could not be found in this experiment:", + string_collapse(setdiff(x = names(X_layers), y = self$ms$names())) + ) + stop(paste(strwrap(msg), collapse = '\n'), call. = FALSE) + } + layer_check <- vapply_lgl( + X = X_layers, + FUN = function(x) { + return(is.character(x) && is_named(x, allow_empty = FALSE)) + } + ) + if (!all(layer_check)) { + stop("All entries in 'X_layers' must be named lists", call. = FALSE) + } + layers <- names(X_layers) + nlayers <- length(X_layers) + null_list <- stats::setNames( + object = vector(mode = 'list', length = nlayers), + nm = layers + ) + # Check `obs_index` + if (is_scalar_character(obs_index)) { + obs_index <- match.arg(obs_index, choices = self$obs$attrnames()) + } + # Check `var_index` + var_index <- var_index %||% null_list + if (length(var_index) == 1L) { + var_index <- stats::setNames( + object = rep_len(x = var_index, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "There must be one 'var_index' for every X layer" = length(var_index) == nlayers, + "'var_index' must be named" = is_named(var_index, allow_empty = FALSE), + "'var_index' must have the same names as 'X_layers'" = all(names(var_index) %in% layers) + ) + # Check `var_column_names` + var_column_names <- var_column_names %||% null_list + stopifnot( + "'var_column_names' must have the same names as 'X_layers'" = all(names(var_column_names) %in% layers) + ) + # Check `obsm_layers` + sublayer_check <- function(x, named = FALSE) { + checks <- c( + is.null(x), + is_scalar_logical(x), + if (isFALSE(named)) { + is.character(x) + } else { + is.character(x) && is_named(x, allow_empty = FALSE) + } + ) + return(Reduce(f = `||`, x = checks)) + } + obsm_layers <- obsm_layers %||% null_list + if (is_scalar_logical(obsm_layers)) { + obsm_layers <- stats::setNames( + object = rep_len(x = obsm_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'obsm_layers' must have the same names as 'X_layers'" = all(names(obsm_layers) %in% layers), + "Every entry in 'obsm_layers' must be a character vector" = all(vapply_lgl(obsm_layers, sublayer_check)) + ) + # Check `varm_layers` + varm_layers <- varm_layers %||% null_list + if (is_scalar_logical(varm_layers)) { + varm_layers <- stats::setNames( + object = rep_len(x = varm_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'varm_layers' must have the same names as 'X_layers'" = all(names(varm_layers) %in% layers), + "Every entry in 'varm_layers' must be a named character vector" = all(vapply_lgl( + X = varm_layers, + FUN = sublayer_check, + named = TRUE + )) + ) + # Check `obsp_layers` + obsp_layers <- obsp_layers %||% null_list + if (is_scalar_logical(obsp_layers)) { + obsp_layers <- stats::setNames( + object = rep_len(x = obsp_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'obsp_layers' must have the same names as 'X_layers'" = all(names(obsp_layers) %in% layers), + "Every entry in 'obsp_layers' must be a character vector" = all(vapply_lgl(obsp_layers, sublayer_check)) + ) + # Load in the first assay as the default assay + active <- names(X_layers)[1L] + query <- SOMAExperimentAxisQuery$new( + experiment = self, + measurement_name = active + ) + object <- query$to_seurat( + X_layers = X_layers[[active]], + obs_index = obs_index, + var_index = var_index[[active]], + obs_column_names = obs_column_names, + var_column_names = var_column_names[[active]], + obsm_layers = obsm_layers[[active]], + varm_layers = varm_layers[[active]], + obsp_layers = obsp_layers[[active]] + ) + # Add alternate assays + for (assay in setdiff(x = layers, y = active)) { + query <- SOMAExperimentAxisQuery$new( + experiment = self, + measurement_name = assay + ) + obj <- tryCatch( + expr = query$to_seurat_assay( + X_layers = X_layers[[assay]], + obs_index = obs_index, + var_index = var_index[[assay]], + var_column_names = var_column_names[[assay]] + ), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(obj)) { + next + } + object[[assay]] <- obj + # Add reductions + embeddings <- obsm_layers[[assay]] + skip_reducs <- isFALSE(obsm_layers) || rlang::is_na(obsm_layers) + obsm <- tryCatch(expr = self$ms$get(assay)$get('obsm'), error = null) + if (is.null(obsm)) { + if (!skip_reducs) { + warning( + 'Dimensional reductions were requested for assay', + sQuote(assay), + ', but no reductions found', + call. = FALSE, + immediate. = TRUE + ) + } + skip_reducs <- TRUE + } + if (!skip_reducs) { + if (isTRUE(embeddings)) { + embeddings <- NULL + } + loadings <- varm_layers[['loadings']] + if (isTRUE(loadings)) { + loadings <- NULL + } + reductions <- .get_seurat_reductions( + query = query, + obsm_layers = embeddings, + varm_layers = loadings, + obs_index = obs_index, + var_index = var_index[[assay]] + ) + if (length(reductions)) { + for (reduc in names(reductions)) { + object[[reduc]] <- reductions + } + } + } + # Add graphs + graphs <- obsp_layers[[assay]] + obsp <- tryCatch(expr = self$ms$get(assay)$get('obsp'), error = null) + if (is.null(obsp)) { + if (!(isFALSE(graphs) || rlang::is_na(graphs))) { + '' + } + } + } + return(object) + } + ), + active = list( #' @field obs a [`SOMADataFrame`] containing primary annotations on the #' observation axis. The contents of the `soma_joinid` column define the diff --git a/apis/r/R/utils-assertions.R b/apis/r/R/utils-assertions.R index 5dac41f924..d03ddf9473 100644 --- a/apis/r/R/utils-assertions.R +++ b/apis/r/R/utils-assertions.R @@ -13,8 +13,8 @@ is_named <- function(x, allow_empty = TRUE) { !is.null(names(x)) && ifelse(allow_empty, TRUE, all(nzchar(x = names(x = x)))) } -is_named_list <- function(x) { - is.list(x) && is_named(x) +is_named_list <- function(x, allow_empty = TRUE) { + is.list(x) && is_named(x, allow_empty = allow_empty) } is_scalar_logical <- function(x) { diff --git a/apis/r/R/utils-seurat.R b/apis/r/R/utils-seurat.R index 6d1fb3bd22..99e5f80b8f 100644 --- a/apis/r/R/utils-seurat.R +++ b/apis/r/R/utils-seurat.R @@ -1,3 +1,55 @@ +#' Create \pkg{Seurat}-Style Names +#' +#' Convert AnnData-style names to \pkg{Seurat}-style names +#' +#' @param x A character vector of names +#' @param type Type of conversion to perform; choose from: +#' \itemize{ +#' \item \dQuote{\code{embeddings}}: convert AnnData-style \code{obsm} names +#' \item \dQuote{\code{loadings}}: convert AnnData-style \code{varm} names +#' } +#' +#' @return \code{x} with names converted to Seurat-style names +#' based on \code{type} +#' +#' @keywords internal +#' +#' @noRd +#' +.anndata_to_seurat_reduc <- function(x, type = c('embeddings', 'loadings')) { + if (is.null(x)) { + return(NULL) + } + stopifnot(is.character(x), is.character(type)) + type <- type[1L] + type <- match.arg(type) + return(switch( + EXPR = type, + embeddings = tolower(gsub(pattern = '^X_', replacement = '', x = x)), + loadings = { + m <- regexpr(pattern = '[[:upper:]]+', text = x) + x <- tolower(unlist(regmatches(x = x, m = m))) + x[x == 'pc'] <- 'pca' + x[x == 'ic'] <- 'ica' + x + } + )) +} + +#' Check \pkg{SeuratObject} Installation Status +#' +#' Check to see that valid version of \pkg{SeuratObject} is installed +#' +#' @inheritParams base::requireNamespace +#' +#' @return If \code{quietly}, then invisibly returns the installation status; +#' otherwise, errors if a valid version \pkg{SeuratObject} is unavailable or +#' invisibly returns \code{TRUE} +#' +#' @keywords internal +#' +#' @noRd +#' .check_seurat_installed <- function(quietly = FALSE) { pkg <- 'SeuratObject' checks <- c( @@ -27,26 +79,198 @@ return(invisible(TRUE)) } -.anndata_to_seurat_reduc <- function(x, type = c('embeddings', 'loadings')) { - if (is.null(x)) { - return(NULL) +#' Load \link[SeuratObject:Graph]{Nearest-Neighbor Graphs} +#' +#' Read in \link[SeuratObject:Graph]{nearest-neighbor graph} objects from a +#' \code{\link{SOMAExperimentAxisQuery}} object +#' +#' @param query A \code{\link{SOMAExperimentAxisQuery}} object +#' @param obsp_layers Names of arrays in \code{obsp} to load in as +#' \code{\link[SeuratObject]{Graph}s}; by default, loads all graphs +#' @template param-obs-index +#' +#' @return A named list of \code{\link[SeuratObject]{Graph}} objects +#' +#' @keywords internal +#' +#' @noRd +#' +.get_seurat_graphs <- function(query, obsp_layers = NULL, obs_index = NULL) { + .check_seurat_installed() + stopifnot( + "'query' must be a SOMAExperimentAxisQuery object" = inherits( + x = query, + what = 'SOMAExperimentAxisQuery' + ), + "'obsp_layers' must be a character vector" = is_character_or_null(obsp_layers), + "'obs_index' must be a single character value" = is.null(obs_index) || + (is_scalar_character(obs_index) && !is.na(obs_index)) + ) + ms_graphs <- tryCatch(expr = query$ms$obsp$names(), error = null) + if (is.null(ms_graphs)) { + stop("No nearest-neighbor graphs found", call. = FALSE) } - stopifnot(is.character(x), is.character(type)) - type <- type[1L] - type <- match.arg(type) - return(switch( - EXPR = type, - embeddings = tolower(gsub(pattern = '^X_', replacement = '', x = x)), - loadings = { - m <- regexpr(pattern = '[[:upper:]]+', text = x) - x <- tolower(unlist(regmatches(x = x, m = m))) - x[x == 'pc'] <- 'pca' - x[x == 'ic'] <- 'ica' - x + obsp_layers <- obsp_layers %||% ms_graphs + res <- stats::setNames( + object = vector(mode = 'list', length = length(obsp_layers)), + nm = obsp_layers + ) + for (grph in obsp_layers) { + mat <- tryCatch( + expr = query$to_seurat_graph(obsp_layer = grph, obs_index = obs_index), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(mat)) { + next } - )) + res[[grph]] <- mat + } + return(Filter(f = Negate(is.null), x = res)) +} + +#' Load \link[SeuratObject:DimReduc]{Dimensional Reductions} +#' +#' Read in \link[SeuratObject:DimReduc]{dimensional reduction} objects from a +#' \code{\link{SOMAExperimentAxisQuery}} object +#' +#' @param query A \code{\link{SOMAExperimentAxisQuery}} object +#' @param obsm_layers Names of arrays in \code{obsm} to load in as the +#' cell embeddings; pass \code{FALSE} to suppress loading in any +#' dimensional reductions; by default, loads all dimensional +#' reduction information +#' @param varm_layers Named vector of arrays in \code{varm} to load in as +#' the feature loadings; names must be names of array in \code{obsm} (eg. +#' \code{varm_layers = c(X_pca = 'PCs')}); will try to determine +#' \code{varm_layers} from \code{obsm_layers} +#' @template param-obs-index +#' @template param-var-index +#' +#' @return A named list of \code{\link[SeuratObject]{DimReduc}} objects where +#' the names are the \pkg{Seurat}-style names +#' +#' @keywords internal +#' +#' @noRd +#' +.get_seurat_reductions <- function( + query, + obsm_layers = NULL, + varm_layers = NULL, + obs_index = NULL, + var_index = NULL +) { + .check_seurat_installed() + stopifnot( + "'query' must be a SOMAExperimentAxisQuery object" = inherits( + x = query, + what = 'SOMAExperimentAxisQuery' + ), + "'obsm_layers' must be a character vector" = is_character_or_null(obsm_layers), + "'varm_layers' must be a named character vector" = is.null(varm_layers) || + (is.character(varm_layers) && is_named(varm_layers, allow_empty = FALSE)) || + is_scalar_logical(varm_layers), + "'obs_index' must be a single character value" = is.null(obs_index) || + (is_scalar_character(obs_index) && !is.na(obs_index)), + "'var_index' must be a single character value" = is.null(var_index) || + (is_scalar_character(var_index) && !is.na(var_index)) + ) + ms_embed <- tryCatch(expr = query$ms$obsm$names(), error = null) + if (is.null(ms_embed)) { + stop("No reductions found", call. = FALSE) + } + names(ms_embed) <- .anndata_to_seurat_reduc(ms_embed) + obsm_layers <- obsm_layers %||% ms_embed + res <- stats::setNames( + object = vector(mode = 'list', length = length(obsm_layers)), + nm = .anndata_to_seurat_reduc(obsm_layers) + ) + # Match loadings to embeddings + ms_load <- tryCatch(expr = query$ms$varm$names(), error = null) + if (isTRUE(varm_layers)) { + varm_layers <- NULL + } else if (rlang::is_na(varm_layers)) { + varm_layers <- FALSE + } + if (is.null(ms_load) && !isFALSE(varm_layers)) { + warning("No loadings found", call. = FALSE, immediate. = TRUE) + varm_layers <- FALSE + } + if (!isFALSE(varm_layers)) { + names(ms_load) <- ms_embed[.anndata_to_seurat_reduc(ms_load, 'loadings')] + varm_layers <- varm_layers %||% ms_load + reduc_misisng <- setdiff(x = names(varm_layers), y = names(ms_load)) + if (length(reduc_misisng) == length(varm_layers)) { + warning( + "None of the reductions specified in 'varm_layers' can be found", + call. = FALSE, + immediate. = TRUE + ) + varm_layers <- FALSE + } else if (length(reduc_misisng)) { + warning( + paste( + strwrap(paste( + "The reductions for the following loadings cannot be found in 'varm':", + sQuote(varm_layers[reduc_misisng]), + collapse = ', ' + )), + collapse = '\n' + ), + call. = FALSE, + immediate. = TRUE + ) + varm_layers <- varm_layers[!names(varm_layers) %in% reduc_misisng] + } + } + # Read in reductions and add to `object` + for (embed in obsm_layers) { + if (embed %in% names(ms_embed)) { + embed <- ms_embed[embed] + } + rname <- .anndata_to_seurat_reduc(embed) + reduc <- tryCatch( + expr = query$to_seurat_reduction( + obsm_layer = embed, + varm_layer = ifelse( + embed %in% names(varm_layers), + yes = varm_layers[embed], + no = FALSE + ), + obs_index = obs_index, + var_index = var_index + ), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(reduc)) { + next + } + res[[rname]] <- reduc + } + return(Filter(f = Negate(is.null), x = res)) } +#' Minimum Version of \pkg{SeuratObject} +#' +#' Fetch the minimum required version of \pkg{SeuratObject} +#' +#' @param repr Representation of the version; choose from: +#' \itemize{ +#' \item \dQuote{\code{v}} to return a \code{\link[base]{package_version}} +#' \item \dQuote{\code{c}} to return a \code{\link[base]{character}} +#' } +#' +#' @return The minimum required version of \pkg{SeuratObject} +#' +#' @keywords internal +#' +#' @noRd +#' .MINIMUM_SEURAT_VERSION <- function(repr = c('v', 'c')) { repr <- repr[1L] repr <- match.arg(arg = repr) diff --git a/apis/r/man/SOMAExperiment.Rd b/apis/r/man/SOMAExperiment.Rd index 9c6b271245..f1adfdab10 100644 --- a/apis/r/man/SOMAExperiment.Rd +++ b/apis/r/man/SOMAExperiment.Rd @@ -27,6 +27,7 @@ observation index domain, \code{obs_id}. All observations for the \section{Methods}{ \subsection{Public methods}{ \itemize{ +\item \href{#method-SOMAExperiment-to_seurat}{\code{SOMAExperiment$to_seurat()}} \item \href{#method-SOMAExperiment-clone}{\code{SOMAExperiment$clone()}} } } @@ -57,6 +58,100 @@ observation index domain, \code{obs_id}. All observations for the }} \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMAExperiment-to_seurat}{}}} +\subsection{Method \code{to_seurat()}}{ +Loads the experiment as a \code{\link[SeuratObject]{Seurat}} +object +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SOMAExperiment$to_seurat( + X_layers, + obs_index = NULL, + var_index = NULL, + obs_column_names = NULL, + var_column_names = NULL, + obsm_layers = NULL, + varm_layers = NULL, + obsp_layers = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X_layers}}{A named list of named character vectors describing the +measurements to load and the layers within those measurements to read in; +for example: \preformatted{ +list( + RNA = c(counts = "counts", data = "logcounts"), + ADT = c(counts = "counts") +) +}} + +\item{\code{obs_index}}{Name of column in \code{obs} to add as cell names; uses +\code{paste0("cell", obs_joinids())} by default} + +\item{\code{var_index}}{A named character of column names in \code{var} for +each measurement to use as feature names; for example: \preformatted{ +c(RNA = "gene_name", ADT = "protein_name") +} +Uses \code{paste0("feature", var_joinids())} by default} + +\item{\code{obs_column_names}}{Names of columns in \code{obs} to add as +cell-level meta data; by default, loads all columns} + +\item{\code{var_column_names}}{A named list of character vectors describing the +columns in \code{var} for each measurement to load in as feature-level +meta data; for example: \preformatted{ +list( + RNA = c("vst.mean", "vst.variable"), + ADT = c("ensembl_id") +) +} +By default, loads in entire feature-level meta data for all measurements +described in \code{X_layers}} + +\item{\code{obsm_layers}}{A named list of character vectors describing the +arrays in \code{obsm} for each measurement to load in as +dimensional reductions; for example: \preformatted{ +list( + RNA = c("pca", "umap"), + ADT = c("adtpca", "adtumap") +) +} +By default, loads in all dimensional reductions for all measurements +described in \code{X_layers}} + +\item{\code{varm_layers}}{A named list of named character vectors describing the +arrays in \code{varm} to load in as feature loadings and which array in +\code{obsm} they correspond to; for example: \preformatted{ +list( + RNA = c(pca = "PCs"), + ADT = c(adtpca = "ADTPCs") +) +} +By default, will try to determine \code{varm_layers} from +\code{obsm_layers} and load in all loadings for all dimensional +reductions for all measurements described in \code{X_layers}} + +\item{\code{obsp_layers}}{A named list of character vectors describing the +arrays in \code{obsp} for each measurement to load in as +nearest neighbor graphs; for example: \preformatted{ +list( + RNA = c("RNA_nn", "RNA_snn"), + ADT = c("ADT_nn") +) +} +By default, loads in all nearest-neighbor graphs for all measurements for +all measurements described in \code{X_layers}} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +A \code{\link[SeuratObject]{Seurat}} object +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SOMAExperiment-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/apis/r/man/roxygen/templates/param-obs-column-names.R b/apis/r/man/roxygen/templates/param-obs-column-names.R new file mode 100644 index 0000000000..8f9d41d93d --- /dev/null +++ b/apis/r/man/roxygen/templates/param-obs-column-names.R @@ -0,0 +1,2 @@ +#' @param obs_column_names Names of columns in \code{obs} to add as +#' cell-level meta data; by default, loads all columns diff --git a/apis/r/tests/testthat/test-SeuratOutgestExperiment.R b/apis/r/tests/testthat/test-SeuratOutgestExperiment.R new file mode 100644 index 0000000000..5ed76275c2 --- /dev/null +++ b/apis/r/tests/testthat/test-SeuratOutgestExperiment.R @@ -0,0 +1,221 @@ +test_that("Load Seurat object from SOMA Experiment mechanics", { + skip_if_not_installed('SeuratObject', .MINIMUM_SEURAT_VERSION('c')) + uri <- withr::local_tempdir("seurat-experiment-whole") + n_obs <- 1001L + n_var <- 99L + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = c("counts", "logcounts") + ) + # Add embeddings + n_pcs <- 50L + n_umaps <- 2L + obsm <- SOMACollectionCreate(file.path(experiment$ms$get('RNA')$uri, 'obsm')) + obsm$add_new_sparse_ndarray( + key = 'X_pca', + type = arrow::int32(), + shape = c(n_obs, n_pcs) + ) + obsm$get('X_pca')$write(create_sparse_matrix_with_int_dims( + nrows = n_obs, + ncols = n_pcs + )) + obsm$add_new_sparse_ndarray( + key = 'X_umap', + type = arrow::int32(), + shape = c(n_obs, n_umaps) + ) + obsm$get('X_umap')$write(create_sparse_matrix_with_int_dims( + nrows = n_obs, + ncols = n_umaps, + seed = 2L + )) + experiment$ms$get("RNA")$add_new_collection(obsm, 'obsm') + # Add loadings + varm <- SOMACollectionCreate(file.path(experiment$ms$get('RNA')$uri, 'varm')) + varm$add_new_sparse_ndarray( + key = 'PCs', + type = arrow::int32(), + shape = c(n_var, n_pcs) + ) + varm$get('PCs')$write(create_sparse_matrix_with_int_dims( + nrows = n_var, + ncols = n_pcs + )) + experiment$ms$get('RNA')$add_new_collection(varm, 'varm') + # Add graph + obsp <- SOMACollectionCreate(file.path(experiment$ms$get('RNA')$uri, 'obsp')) + obsp$add_new_sparse_ndarray( + key = 'connectivities', + type = arrow::int32(), + shape = c(n_obs, n_obs) + ) + obsp$get('connectivities')$write(create_sparse_matrix_with_int_dims( + nrows = n_obs, + ncols = n_obs + )) + experiment$ms$get("RNA")$add_new_collection(obsp, 'obsp') + var_tbl <- experiment$ms$get('RNA')$get('var')$read() + obs_tbl <- experiment$obs$read() + X_layers <- list(RNA = c(counts = 'counts', data = 'logcounts')) + expect_no_condition(obj <- experiment$to_seurat(X_layers)) + expect_s4_class(obj, 'Seurat') + expect_identical(dim(obj), c(n_var, n_obs)) + expect_identical( + rownames(obj), + paste0( + 'feature', + var_tbl$GetColumnByName('soma_joinid')$as_vector() + ) + ) + expect_identical( + colnames(obj), + paste0( + 'cell', + obs_tbl$GetColumnByName('soma_joinid')$as_vector() + ) + ) + expect_true(all(experiment$obs$attrnames() %in% names(obj[[]]))) + expect_identical(SeuratObject::Assays(obj), 'RNA') + expect_s4_class(rna <- obj[['RNA']], 'Assay') + expect_identical(rownames(rna), rownames(obj)) + expect_identical(colnames(rna), colnames(obj)) + expect_identical(SeuratObject::Reductions(obj), c('pca', 'umap')) + expect_s4_class(pca <- obj[['pca']], 'DimReduc') + expect_identical(SeuratObject::Cells(pca), colnames(obj)) + expect_identical(rownames(SeuratObject::Loadings(pca)), rownames(obj)) + expect_identical(ncol(pca), n_pcs) + expect_s4_class(umap <- obj[['umap']], 'DimReduc') + expect_identical(SeuratObject::Cells(umap), colnames(obj)) + expect_identical(ncol(umap), n_umaps) + expect_true(SeuratObject::IsMatrixEmpty(SeuratObject::Loadings(umap))) + expect_identical(SeuratObject::Graphs(obj), 'connectivities') + expect_s4_class(graph <- obj[['connectivities']], 'Graph') + expect_identical(dim(graph), c(n_obs, n_obs)) + expect_identical(rownames(graph), colnames(obj)) + expect_identical(colnames(graph), colnames(obj)) + # Test named + expect_no_condition(obj <- experiment$to_seurat( + X_layers, + obs_index = 'baz', + var_index = c(RNA = 'quux') + )) + expect_s4_class(obj, 'Seurat') + expect_identical(dim(obj), c(n_var, n_obs)) + expect_identical( + rownames(obj), + var_tbl$GetColumnByName('quux')$as_vector() + ) + expect_identical( + colnames(obj), + obs_tbl$GetColumnByName('baz')$as_vector() + ) + expect_identical(SeuratObject::Assays(obj), 'RNA') + expect_false(all(experiment$obs$attrnames() %in% names(obj[[]]))) + expect_true(all(setdiff(experiment$obs$attrnames(), 'baz') %in% names(obj[[]]))) + expect_s4_class(rna <- obj[['RNA']], 'Assay') + expect_identical(rownames(rna), rownames(obj)) + expect_identical(colnames(rna), colnames(obj)) + expect_identical(SeuratObject::Reductions(obj), c('pca', 'umap')) + expect_s4_class(pca <- obj[['pca']], 'DimReduc') + expect_identical(SeuratObject::Cells(pca), colnames(obj)) + expect_identical(rownames(SeuratObject::Loadings(pca)), rownames(obj)) + expect_identical(ncol(pca), n_pcs) + expect_s4_class(umap <- obj[['umap']], 'DimReduc') + expect_identical(SeuratObject::Cells(umap), colnames(obj)) + expect_identical(ncol(umap), n_umaps) + expect_true(SeuratObject::IsMatrixEmpty(SeuratObject::Loadings(umap))) + expect_identical(SeuratObject::Graphs(obj), 'connectivities') + expect_s4_class(graph <- obj[['connectivities']], 'Graph') + expect_identical(dim(graph), c(n_obs, n_obs)) + expect_identical(rownames(graph), colnames(obj)) + expect_identical(colnames(graph), colnames(obj)) + # Test `X_layers` + expect_no_condition(obj <- experiment$to_seurat(list(RNA = c(counts = 'counts')))) + expect_s4_class( + counts <- SeuratObject::GetAssayData(obj[['RNA']], 'counts'), + 'dgCMatrix' + ) + expect_s4_class( + data <- SeuratObject::GetAssayData(obj[['RNA']], 'data'), + 'dgCMatrix' + ) + expect_identical(counts, data) + expect_no_condition(obj <- experiment$to_seurat( + list(RNA = c(data = 'logcounts')) + )) + expect_s4_class( + SeuratObject::GetAssayData(obj[['RNA']], 'data'), + 'dgCMatrix' + ) + expect_true(SeuratObject::IsMatrixEmpty(SeuratObject::GetAssayData( + obj[['RNA']], + 'counts' + ))) + # Test suppress reductions + expect_no_condition(obj <- experiment$to_seurat(X_layers, obsm_layers = FALSE)) + expect_length(SeuratObject::Reductions(obj), 0L) + expect_no_condition(obj <- experiment$to_seurat(X_layers, obsm_layers = NA)) + expect_length(SeuratObject::Reductions(obj), 0L) + expect_no_condition(obj <- experiment$to_seurat( + X_layers, + obsm_layers = list(RNA = 'umap') + )) + expect_identical(SeuratObject::Reductions(obj), 'umap') + expect_error(obj[['pca']]) + # Test suppress loadings + expect_no_condition(obj <- experiment$to_seurat(X_layers, varm_layers = FALSE)) + expect_identical(SeuratObject::Reductions(obj), c('pca', 'umap')) + expect_true(SeuratObject::IsMatrixEmpty(SeuratObject::Loadings(obj[['pca']]))) + # Test suppress graphs + expect_no_condition(obj <- experiment$to_seurat(X_layers, obsp_layers = FALSE)) + expect_length(SeuratObject::Graphs(obj), 0L) + # Test suppress cell-level meta data + expect_no_condition(obj <- experiment$to_seurat( + X_layers, + obs_column_names = FALSE + )) + expect_false(any(experiment$obs$attrnames() %in% names(obj[[]]))) + # Test `X_layers` assertions + expect_error(experiment$to_seurat(NULL)) + expect_error(experiment$to_seurat(FALSE)) + expect_error(experiment$to_seurat(1)) + expect_error(experiment$to_seurat('counts')) + expect_error(experiment$to_seurat(unlist(list(counts = 'counts', 'logcounts')))) + expect_error(experiment$to_seurat(list(counts = 'counts', data = 'logcounts'))) + expect_error(experiment$to_seurat(c(a = 'counts'))) + expect_error(experiment$to_seurat(c(scale.data = 'counts'))) + expect_error(query$to_seurat(c(data = 'tomato'))) + # Test `obs_index` assertions + expect_error(experiment$to_seurat(X_layers, obs_index = FALSE)) + expect_error(experiment$to_seurat(X_layers, obs_index = NA_character_)) + expect_error(experiment$to_seurat(X_layers, obs_index = 1)) + expect_error(experiment$to_seurat(X_layers, obs_index = c('baz', 'foo'))) + expect_error(experiment$to_seurat(X_layers, obs_index = 'tomato')) + # Test `obs_column_names` assertions + expect_error(experiment$to_seurat(X_layers, obs_column_names = 1L)) + expect_error(experiment$to_seurat( + X_layers, + obs_column_names = c( + NA_character_, + NA_character_ + ) + )) + expect_error(experiment$to_seurat(X_layers, obs_column_names = c(TRUE, FALSE))) + expect_error(experiment$to_seurat(X_layers, obs_column_names = 'tomato')) + expect_error(experiment$to_seurat(X_layers, obs_column_names = 'tomato')) + # Test `obsm_layers` assertions + expect_error(experiment$to_seurat(X_layers, obsm_layers = 1L)) + expect_error(experiment$to_seurat(X_layers, obsm_layers = 'tomato')) + expect_error(experiment$to_seurat(X_layers, obsm_layers = 'umap')) + # Test `varm_layers` assertions + expect_error(experiment$to_seurat(X_layers, varm_layers = 1L)) + expect_error(experiment$to_seurat(X_layers, varm_layers = 'PCs')) + expect_error(experiment$to_seurat(X_layers, varm_layers = c(tomato = 'PCs'))) + expect_error(experiment$to_seurat(X_layers, varm_layers = c(X_pca = 'tomato'))) + # Test `obsp_layers` assertions + expect_error(experiment$to_seurat(X_layers, obsp_layers = 1L)) + expect_error(experiment$to_seurat(X_layers, obsp_layers = 'tomato')) +})