diff --git a/apis/r/R/SOMAExperiment.R b/apis/r/R/SOMAExperiment.R index 9742573d5e..d049a2cdcb 100644 --- a/apis/r/R/SOMAExperiment.R +++ b/apis/r/R/SOMAExperiment.R @@ -10,6 +10,285 @@ SOMAExperiment <- R6::R6Class( classname = "SOMAExperiment", inherit = SOMACollectionBase, + public = list( + #' @description Loads the experiment as a \code{\link[SeuratObject]{Seurat}} + #' object + #' + #' @param X_layers A named list of named character vectors describing the + #' measurements to load and the layers within those measurements to read in; + #' for example: \preformatted{ + #' list( + #' RNA = c(counts = "counts", data = "logcounts"), + #' ADT = c(counts = "counts") + #' ) + #' } + #' @template param-obs-index + #' @param var_index A named character of column names in \code{var} for + #' each measurement to use as feature names; for example: \preformatted{ + #' c(RNA = "gene_name", ADT = "protein_name") + #' } + #' Uses \code{paste0("feature", var_joinids())} by default + #' @template param-obs-column-names + #' @param var_column_names A named list of character vectors describing the + #' columns in \code{var} for each measurement to load in as feature-level + #' meta data; for example: \preformatted{ + #' list( + #' RNA = c("vst.mean", "vst.variable"), + #' ADT = c("ensembl_id") + #' ) + #' } + #' By default, loads in entire feature-level meta data for all measurements + #' described in \code{X_layers} + #' @param obsm_layers A named list of character vectors describing the + #' arrays in \code{obsm} for each measurement to load in as + #' dimensional reductions; for example: \preformatted{ + #' list( + #' RNA = c("pca", "umap"), + #' ADT = c("adtpca", "adtumap") + #' ) + #' } + #' By default, loads in all dimensional reductions for all measurements + #' described in \code{X_layers} + #' @param varm_layers A named list of named character vectors describing the + #' arrays in \code{varm} to load in as feature loadings and which array in + #' \code{obsm} they correspond to; for example: \preformatted{ + #' list( + #' RNA = c(pca = "PCs"), + #' ADT = c(adtpca = "ADTPCs") + #' ) + #' } + #' By default, will try to determine \code{varm_layers} from + #' \code{obsm_layers} and load in all loadings for all dimensional + #' reductions for all measurements described in \code{X_layers} + #' @param obsp_layers A named list of character vectors describing the + #' arrays in \code{obsp} for each measurement to load in as + #' nearest neighbor graphs; for example: \preformatted{ + #' list( + #' RNA = c("RNA_nn", "RNA_snn"), + #' ADT = c("ADT_nn") + #' ) + #' } + #' By default, loads in all nearest-neighbor graphs for all measurements for + #' all measurements described in \code{X_layers} + #' + #' @return A \code{\link[SeuratObject]{Seurat}} object + #' + to_seurat = function( + X_layers, + obs_index = NULL, + var_index = NULL, + obs_column_names = NULL, + var_column_names = NULL, + obsm_layers = NULL, + varm_layers = NULL, + obsp_layers = NULL + ) { + .check_seurat_installed() + stopifnot( + "'X_layers' must be named list" = is_named_list( + X_layers, + allow_empty = FALSE + ), + "'obs_index' must be a single character value" = is.null(obs_index) || + is_scalar_character(obs_index), + "'var_index' must be a named character vector" = is_character_or_null(var_index), + "'var_column_names' must be a named list" = is.null(var_column_names) || + is_named_list(var_column_names, allow_empty = FALSE), + "'obsm_layers' must be a named list" = is.null(obsm_layers) || + is_scalar_logical(obsm_layers) || + is_named_list(obsm_layers, allow_empty = FALSE), + "'varm_layers' must be a named list" = is.null(varm_layers) || + is_scalar_logical(varm_layers) || + is_named_list(varm_layers, allow_empty = FALSE), + "'obsp_layers' must be a named list" = is.null(obsp_layers) || + is_scalar_logical(obsp_layers) || + is_named_list(obsp_layers, allow_empty = FALSE) + ) + # Check `X_layers` + if (!all(names(X_layers) %in% self$ms$names())) { + msg <- paste( + "The following measurements could not be found in this experiment:", + string_collapse(setdiff(x = names(X_layers), y = self$ms$names())) + ) + stop(paste(strwrap(msg), collapse = '\n'), call. = FALSE) + } + layer_check <- vapply_lgl( + X = X_layers, + FUN = function(x) { + return(is.character(x) && is_named(x, allow_empty = FALSE)) + } + ) + if (!all(layer_check)) { + stop("All entries in 'X_layers' must be named lists", call. = FALSE) + } + layers <- names(X_layers) + nlayers <- length(X_layers) + null_list <- stats::setNames( + object = vector(mode = 'list', length = nlayers), + nm = layers + ) + # Check `obs_index` + if (is_scalar_character(obs_index)) { + obs_index <- match.arg(obs_index, choices = self$obs$attrnames()) + } + # Check `var_index` + var_index <- var_index %||% null_list + if (length(var_index) == 1L) { + var_index <- stats::setNames( + object = rep_len(x = var_index, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "There must be one 'var_index' for every X layer" = length(var_index) == nlayers, + "'var_index' must be named" = is_named(var_index, allow_empty = FALSE), + "'var_index' must have the same names as 'X_layers'" = all(names(var_index) %in% layers) + ) + # Check `var_column_names` + var_column_names <- var_column_names %||% null_list + stopifnot( + "'var_column_names' must have the same names as 'X_layers'" = all(names(var_column_names) %in% layers) + ) + # Check `obsm_layers` + sublayer_check <- function(x, named = FALSE) { + checks <- c( + is.null(x), + is_scalar_logical(x), + if (isFALSE(named)) { + is.character(x) + } else { + is.character(x) && is_named(x, allow_empty = FALSE) + } + ) + return(Reduce(f = `||`, x = checks)) + } + obsm_layers <- obsm_layers %||% null_list + if (is_scalar_logical(obsm_layers)) { + obsm_layers <- stats::setNames( + object = rep_len(x = obsm_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'obsm_layers' must have the same names as 'X_layers'" = all(names(obsm_layers) %in% layers), + "Every entry in 'obsm_layers' must be a character vector" = all(vapply_lgl(obsm_layers, sublayer_check)) + ) + # Check `varm_layers` + varm_layers <- varm_layers %||% null_list + if (is_scalar_logical(varm_layers)) { + varm_layers <- stats::setNames( + object = rep_len(x = varm_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'varm_layers' must have the same names as 'X_layers'" = all(names(varm_layers) %in% layers), + "Every entry in 'varm_layers' must be a named character vector" = all(vapply_lgl( + X = varm_layers, + FUN = sublayer_check, + named = TRUE + )) + ) + # Check `obsp_layers` + obsp_layers <- obsp_layers %||% null_list + if (is_scalar_logical(obsp_layers)) { + obsp_layers <- stats::setNames( + object = rep_len(x = obsp_layers, length.out = nlayers), + nm = layers + ) + } + stopifnot( + "'obsp_layers' must have the same names as 'X_layers'" = all(names(obsp_layers) %in% layers), + "Every entry in 'obsp_layers' must be a character vector" = all(vapply_lgl(obsp_layers, sublayer_check)) + ) + # Load in the first assay as the default assay + active <- names(X_layers)[1L] + query <- SOMAExperimentAxisQuery$new( + experiment = self, + measurement_name = active + ) + object <- query$to_seurat( + X_layers = X_layers[[active]], + obs_index = obs_index, + var_index = var_index[[active]], + obs_column_names = obs_column_names, + var_column_names = var_column_names[[active]], + obsm_layers = obsm_layers[[active]], + varm_layers = varm_layers[[active]], + obsp_layers = obsp_layers[[active]] + ) + # Add alternate assays + for (assay in setdiff(x = layers, y = active)) { + query <- SOMAExperimentAxisQuery$new( + experiment = self, + measurement_name = assay + ) + obj <- tryCatch( + expr = query$to_seurat_assay( + X_layers = X_layers[[assay]], + obs_index = obs_index, + var_index = var_index[[assay]], + var_column_names = var_column_names[[assay]] + ), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(obj)) { + next + } + object[[assay]] <- obj + # Add reductions + embeddings <- obsm_layers[[assay]] + skip_reducs <- isFALSE(obsm_layers) || rlang::is_na(obsm_layers) + obsm <- tryCatch(expr = self$ms$get(assay)$get('obsm'), error = null) + if (is.null(obsm)) { + if (!skip_reducs) { + warning( + 'Dimensional reductions were requested for assay', + sQuote(assay), + ', but no reductions found', + call. = FALSE, + immediate. = TRUE + ) + } + skip_reducs <- TRUE + } + if (!skip_reducs) { + if (isTRUE(embeddings)) { + embeddings <- NULL + } + loadings <- varm_layers[['loadings']] + if (isTRUE(loadings)) { + loadings <- NULL + } + reductions <- .get_seurat_reductions( + query = query, + obsm_layers = embeddings, + varm_layers = loadings, + obs_index = obs_index, + var_index = var_index[[assay]] + ) + if (length(reductions)) { + for (reduc in names(reductions)) { + object[[reduc]] <- reductions + } + } + } + # Add graphs + graphs <- obsp_layers[[assay]] + obsp <- tryCatch(expr = self$ms$get(assay)$get('obsp'), error = null) + if (is.null(obsp)) { + if (!(isFALSE(graphs) || rlang::is_na(graphs))) { + '' + } + } + } + return(object) + } + ), + active = list( #' @field obs a [`SOMADataFrame`] containing primary annotations on the #' observation axis. The contents of the `soma_joinid` column define the diff --git a/apis/r/R/utils-assertions.R b/apis/r/R/utils-assertions.R index 5dac41f924..d03ddf9473 100644 --- a/apis/r/R/utils-assertions.R +++ b/apis/r/R/utils-assertions.R @@ -13,8 +13,8 @@ is_named <- function(x, allow_empty = TRUE) { !is.null(names(x)) && ifelse(allow_empty, TRUE, all(nzchar(x = names(x = x)))) } -is_named_list <- function(x) { - is.list(x) && is_named(x) +is_named_list <- function(x, allow_empty = TRUE) { + is.list(x) && is_named(x, allow_empty = allow_empty) } is_scalar_logical <- function(x) { diff --git a/apis/r/R/utils-seurat.R b/apis/r/R/utils-seurat.R index 6d1fb3bd22..99e5f80b8f 100644 --- a/apis/r/R/utils-seurat.R +++ b/apis/r/R/utils-seurat.R @@ -1,3 +1,55 @@ +#' Create \pkg{Seurat}-Style Names +#' +#' Convert AnnData-style names to \pkg{Seurat}-style names +#' +#' @param x A character vector of names +#' @param type Type of conversion to perform; choose from: +#' \itemize{ +#' \item \dQuote{\code{embeddings}}: convert AnnData-style \code{obsm} names +#' \item \dQuote{\code{loadings}}: convert AnnData-style \code{varm} names +#' } +#' +#' @return \code{x} with names converted to Seurat-style names +#' based on \code{type} +#' +#' @keywords internal +#' +#' @noRd +#' +.anndata_to_seurat_reduc <- function(x, type = c('embeddings', 'loadings')) { + if (is.null(x)) { + return(NULL) + } + stopifnot(is.character(x), is.character(type)) + type <- type[1L] + type <- match.arg(type) + return(switch( + EXPR = type, + embeddings = tolower(gsub(pattern = '^X_', replacement = '', x = x)), + loadings = { + m <- regexpr(pattern = '[[:upper:]]+', text = x) + x <- tolower(unlist(regmatches(x = x, m = m))) + x[x == 'pc'] <- 'pca' + x[x == 'ic'] <- 'ica' + x + } + )) +} + +#' Check \pkg{SeuratObject} Installation Status +#' +#' Check to see that valid version of \pkg{SeuratObject} is installed +#' +#' @inheritParams base::requireNamespace +#' +#' @return If \code{quietly}, then invisibly returns the installation status; +#' otherwise, errors if a valid version \pkg{SeuratObject} is unavailable or +#' invisibly returns \code{TRUE} +#' +#' @keywords internal +#' +#' @noRd +#' .check_seurat_installed <- function(quietly = FALSE) { pkg <- 'SeuratObject' checks <- c( @@ -27,26 +79,198 @@ return(invisible(TRUE)) } -.anndata_to_seurat_reduc <- function(x, type = c('embeddings', 'loadings')) { - if (is.null(x)) { - return(NULL) +#' Load \link[SeuratObject:Graph]{Nearest-Neighbor Graphs} +#' +#' Read in \link[SeuratObject:Graph]{nearest-neighbor graph} objects from a +#' \code{\link{SOMAExperimentAxisQuery}} object +#' +#' @param query A \code{\link{SOMAExperimentAxisQuery}} object +#' @param obsp_layers Names of arrays in \code{obsp} to load in as +#' \code{\link[SeuratObject]{Graph}s}; by default, loads all graphs +#' @template param-obs-index +#' +#' @return A named list of \code{\link[SeuratObject]{Graph}} objects +#' +#' @keywords internal +#' +#' @noRd +#' +.get_seurat_graphs <- function(query, obsp_layers = NULL, obs_index = NULL) { + .check_seurat_installed() + stopifnot( + "'query' must be a SOMAExperimentAxisQuery object" = inherits( + x = query, + what = 'SOMAExperimentAxisQuery' + ), + "'obsp_layers' must be a character vector" = is_character_or_null(obsp_layers), + "'obs_index' must be a single character value" = is.null(obs_index) || + (is_scalar_character(obs_index) && !is.na(obs_index)) + ) + ms_graphs <- tryCatch(expr = query$ms$obsp$names(), error = null) + if (is.null(ms_graphs)) { + stop("No nearest-neighbor graphs found", call. = FALSE) } - stopifnot(is.character(x), is.character(type)) - type <- type[1L] - type <- match.arg(type) - return(switch( - EXPR = type, - embeddings = tolower(gsub(pattern = '^X_', replacement = '', x = x)), - loadings = { - m <- regexpr(pattern = '[[:upper:]]+', text = x) - x <- tolower(unlist(regmatches(x = x, m = m))) - x[x == 'pc'] <- 'pca' - x[x == 'ic'] <- 'ica' - x + obsp_layers <- obsp_layers %||% ms_graphs + res <- stats::setNames( + object = vector(mode = 'list', length = length(obsp_layers)), + nm = obsp_layers + ) + for (grph in obsp_layers) { + mat <- tryCatch( + expr = query$to_seurat_graph(obsp_layer = grph, obs_index = obs_index), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(mat)) { + next } - )) + res[[grph]] <- mat + } + return(Filter(f = Negate(is.null), x = res)) +} + +#' Load \link[SeuratObject:DimReduc]{Dimensional Reductions} +#' +#' Read in \link[SeuratObject:DimReduc]{dimensional reduction} objects from a +#' \code{\link{SOMAExperimentAxisQuery}} object +#' +#' @param query A \code{\link{SOMAExperimentAxisQuery}} object +#' @param obsm_layers Names of arrays in \code{obsm} to load in as the +#' cell embeddings; pass \code{FALSE} to suppress loading in any +#' dimensional reductions; by default, loads all dimensional +#' reduction information +#' @param varm_layers Named vector of arrays in \code{varm} to load in as +#' the feature loadings; names must be names of array in \code{obsm} (eg. +#' \code{varm_layers = c(X_pca = 'PCs')}); will try to determine +#' \code{varm_layers} from \code{obsm_layers} +#' @template param-obs-index +#' @template param-var-index +#' +#' @return A named list of \code{\link[SeuratObject]{DimReduc}} objects where +#' the names are the \pkg{Seurat}-style names +#' +#' @keywords internal +#' +#' @noRd +#' +.get_seurat_reductions <- function( + query, + obsm_layers = NULL, + varm_layers = NULL, + obs_index = NULL, + var_index = NULL +) { + .check_seurat_installed() + stopifnot( + "'query' must be a SOMAExperimentAxisQuery object" = inherits( + x = query, + what = 'SOMAExperimentAxisQuery' + ), + "'obsm_layers' must be a character vector" = is_character_or_null(obsm_layers), + "'varm_layers' must be a named character vector" = is.null(varm_layers) || + (is.character(varm_layers) && is_named(varm_layers, allow_empty = FALSE)) || + is_scalar_logical(varm_layers), + "'obs_index' must be a single character value" = is.null(obs_index) || + (is_scalar_character(obs_index) && !is.na(obs_index)), + "'var_index' must be a single character value" = is.null(var_index) || + (is_scalar_character(var_index) && !is.na(var_index)) + ) + ms_embed <- tryCatch(expr = query$ms$obsm$names(), error = null) + if (is.null(ms_embed)) { + stop("No reductions found", call. = FALSE) + } + names(ms_embed) <- .anndata_to_seurat_reduc(ms_embed) + obsm_layers <- obsm_layers %||% ms_embed + res <- stats::setNames( + object = vector(mode = 'list', length = length(obsm_layers)), + nm = .anndata_to_seurat_reduc(obsm_layers) + ) + # Match loadings to embeddings + ms_load <- tryCatch(expr = query$ms$varm$names(), error = null) + if (isTRUE(varm_layers)) { + varm_layers <- NULL + } else if (rlang::is_na(varm_layers)) { + varm_layers <- FALSE + } + if (is.null(ms_load) && !isFALSE(varm_layers)) { + warning("No loadings found", call. = FALSE, immediate. = TRUE) + varm_layers <- FALSE + } + if (!isFALSE(varm_layers)) { + names(ms_load) <- ms_embed[.anndata_to_seurat_reduc(ms_load, 'loadings')] + varm_layers <- varm_layers %||% ms_load + reduc_misisng <- setdiff(x = names(varm_layers), y = names(ms_load)) + if (length(reduc_misisng) == length(varm_layers)) { + warning( + "None of the reductions specified in 'varm_layers' can be found", + call. = FALSE, + immediate. = TRUE + ) + varm_layers <- FALSE + } else if (length(reduc_misisng)) { + warning( + paste( + strwrap(paste( + "The reductions for the following loadings cannot be found in 'varm':", + sQuote(varm_layers[reduc_misisng]), + collapse = ', ' + )), + collapse = '\n' + ), + call. = FALSE, + immediate. = TRUE + ) + varm_layers <- varm_layers[!names(varm_layers) %in% reduc_misisng] + } + } + # Read in reductions and add to `object` + for (embed in obsm_layers) { + if (embed %in% names(ms_embed)) { + embed <- ms_embed[embed] + } + rname <- .anndata_to_seurat_reduc(embed) + reduc <- tryCatch( + expr = query$to_seurat_reduction( + obsm_layer = embed, + varm_layer = ifelse( + embed %in% names(varm_layers), + yes = varm_layers[embed], + no = FALSE + ), + obs_index = obs_index, + var_index = var_index + ), + error = function(e) { + warning(conditionMessage(e), call. = FALSE, immediate. = TRUE) + return(NULL) + } + ) + if (is.null(reduc)) { + next + } + res[[rname]] <- reduc + } + return(Filter(f = Negate(is.null), x = res)) } +#' Minimum Version of \pkg{SeuratObject} +#' +#' Fetch the minimum required version of \pkg{SeuratObject} +#' +#' @param repr Representation of the version; choose from: +#' \itemize{ +#' \item \dQuote{\code{v}} to return a \code{\link[base]{package_version}} +#' \item \dQuote{\code{c}} to return a \code{\link[base]{character}} +#' } +#' +#' @return The minimum required version of \pkg{SeuratObject} +#' +#' @keywords internal +#' +#' @noRd +#' .MINIMUM_SEURAT_VERSION <- function(repr = c('v', 'c')) { repr <- repr[1L] repr <- match.arg(arg = repr) diff --git a/apis/r/man/SOMAExperiment.Rd b/apis/r/man/SOMAExperiment.Rd index 9c6b271245..f1adfdab10 100644 --- a/apis/r/man/SOMAExperiment.Rd +++ b/apis/r/man/SOMAExperiment.Rd @@ -27,6 +27,7 @@ observation index domain, \code{obs_id}. All observations for the \section{Methods}{ \subsection{Public methods}{ \itemize{ +\item \href{#method-SOMAExperiment-to_seurat}{\code{SOMAExperiment$to_seurat()}} \item \href{#method-SOMAExperiment-clone}{\code{SOMAExperiment$clone()}} } } @@ -57,6 +58,100 @@ observation index domain, \code{obs_id}. All observations for the }} \if{html}{\out{