diff --git a/R/cave-tables.R b/R/cave-tables.R index 82ef423..509a193 100644 --- a/R/cave-tables.R +++ b/R/cave-tables.R @@ -67,10 +67,13 @@ banc_nuclei <- function (rootids = NULL, else if (!is.null(rootids)) { rootids <- banc_ids(rootids) nuclei <- if (length(rootids) < 200) - banc_cave_query(table = table, filter_in_dict = list(pt_root_id=rootids), + banc_cave_query(table = table, + filter_in_dict = list(pt_root_id=rootids), ...) else - banc_cave_query(table = table, live = F, ...) + banc_cave_query(table = table, + live = TRUE, + ...) if (nrow(nuclei) == 0) return(nuclei) nuclei <- nuclei %>% @@ -89,25 +92,27 @@ banc_nuclei <- function (rootids = NULL, } } else { nuclei <- banc_cave_query(table = table, - filter_in_dict = list(id=nucleus_ids), ...) + filter_in_dict = list(id=nucleus_ids), + ...) nuclei %>% dplyr::right_join(data.frame(id = as.integer64(nucleus_ids)), by = "id") %>% dplyr::select(colnames(nuclei)) } - res - if (isTRUE(rawcoords)) - res - else { - res <- res %>% - dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x) - nat::xyzmatrix2str(banc_raw2nm(x)))) - res$pt_position <- sapply(res$pt_position, paste, collapse=", ") - res <- res %>% - dplyr::rename(nucleus_id = .data$id, - nucleus_position_nm = .data$pt_position, - root_id = .data$pt_root_id) %>% - dplyr::filter(.data$valid=="t") + res$pt_position <- sapply(res$pt_position, paste, collapse=", ") + # res$pt_position_ref <- sapply(res$pt_position_ref, paste, collapse=", ") + res <- res %>% + dplyr::rename(nucleus_id = .data$id, + nucleus_position = .data$pt_position, + root_id = .data$pt_root_id) %>% + dplyr::filter(.data$valid=="t") + if (isFALSE(rawcoords)) { + # res <- res %>% + # dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x) + # nat::xyzmatrix2str(banc_raw2nm(x)))) + res$nucleus_position_nm <- apply(banc_raw2nm(res$nucleus_position),1,paste_coords) + res$nucleus_position_nm <- gsub("\\(|\\)","",res$nucleus_position_nm) } + res } #' @rdname banc_cave_tables @@ -159,7 +164,7 @@ get_cave_table_data <- function(table, rootids = NULL, ...){ fafbseg::flywire_cave_query(table = table, filter_in_dict = list(pt_root_id=rootids), ...) } else { - fafbseg::flywire_cave_query(table = table, live = F, ...) + fafbseg::flywire_cave_query(table = table, live = TRUE, ...) } } else { df <- fafbseg::flywire_cave_query(table = table , ...) @@ -168,10 +173,11 @@ get_cave_table_data <- function(table, rootids = NULL, ...){ } # hidden -banc_cave_cell_types <- function(){ +banc_cave_cell_types <- function(user_id = NULL){ banc.cell.info <- banc_cell_info(rawcoords = TRUE) banc.cell.info$pt_position <- sapply(banc.cell.info$pt_position, paste, collapse=", ") banc.cell.info.mod <- banc.cell.info %>% + dplyr::filter(valid == 't') %>% dplyr::rowwise() %>% dplyr::mutate(pt_position = paste0(pt_position,collapse=",")) %>% dplyr::ungroup() %>% @@ -189,10 +195,7 @@ banc_cave_cell_types <- function(){ TRUE ~ NA )) %>% dplyr::mutate(user_id = dplyr::case_when( - grepl("neuron identity", tag2) ~ user_id, - grepl("^DN|^AMMC|^PDN|^LH|^il|^T1|^T5|^T4|^TY4|^IN|^il|^HS|^Mi|^PS|^CB|^FB|^AL| - ^FET|^bCS|SEZ-NSC-Hugin|^MDN|^OA|^PS|^ovi|giant fiber|^m-NSC|^l-NSC-ITP - |^OA|^LH|^CSD|^BDN|^AN|^AL|^AV|^AN|^MN|^SA|^Mi|^LH|^L1|^BDN|^LAL",tag) ~ user_id, + !is.na(cell_type) ~ user_id, TRUE ~ NA )) %>% dplyr::mutate(cell_type = gsub("\\\n.*|\\*.*","",cell_type)) %>% @@ -224,10 +227,80 @@ banc_cave_cell_types <- function(){ cell_class = paste(unique(na.omit(sort(cell_class))), collapse = ", "), super_class = paste(unique(na.omit(sort(super_class))), collapse = ", "), cell_type = paste(unique(na.omit(sort(cell_type))), collapse = ", "), - side = paste(unique(na.omit(sort(side))), collapse = ", ")) %>% + side = paste(unique(na.omit(sort(side))), collapse = ", "), + user_id = paste(unique(na.omit(sort(user_id))), collapse = ", ")) %>% dplyr::ungroup() %>% dplyr::rename(cell_id = id, root_id = pt_root_id, supervoxel_id = pt_supervoxel_id, position = pt_position) %>% dplyr::distinct(root_id, supervoxel_id, side, super_class, cell_class, cell_type, .keep_all = TRUE) %>% - dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id, notes) + dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id,notes) %>% + dplyr::left_join(banc_users %>% dplyr::distinct(pi_lab,cave_id) %>% dplyr::mutate(cave_id=as.character(cave_id)), + by=c("user_id"="cave_id")) %>% + dplyr::rename(cell_type_source = pi_lab) banc.cell.info.mod } + +# # # Updated cell_type_source column based on CAVE +# banc.cell.info.mod <- banc_cave_cell_types() +# banc.cell.info.mod <- subset(banc.cell.info.mod, ! user_id %in% c(355,52)) +# bc.all <- banctable_query("SELECT _id, root_id, cell_type, other_names, super_class, cell_class, proofread, region, cell_type_source from banc_meta") +# bc.all$cell_type_source <- unlist(sapply(bc.all$cell_type_source ,function(x) paste(unlist(x),collapse=", "))) +# bc.ct <- bc.all %>% +# dplyr::left_join(banc.cell.info.mod %>% +# dplyr::mutate(root_id=as.character(root_id)) %>% +# dplyr::distinct(root_id, cell_type, cell_type_source), +# by = "root_id") %>% +# dplyr::mutate( +# other_names = ifelse(is.na(other_names),'',other_names), +# cell_type_source.y = gsub("Rachel Wilson Lab", "Wilson lab", cell_type_source.y), +# cell_type_source.y = ifelse(is.na(cell_type_source.y),NA,tolower(cell_type_source.y)), +# cell_type_source.x = ifelse(is.na(cell_type_source.x),NA,tolower(cell_type_source.x)), +# cell_type_source.x = ifelse(grepl("NA|na|princeton|community|CAVE|Princeton",cell_type_source.x),NA,cell_type_source.x), +# cell_type_source.x = ifelse(cell_type_source.x%in%c("","NA"),NA,cell_type_source.x), +# cell_type_source.y = ifelse(cell_type_source.y%in%c("","NA"),NA,cell_type_source.y)) %>% +# dplyr::mutate(cell_type = dplyr::case_when( +# is.na(cell_type.x) ~ cell_type.y, +# is.na(cell_type.y) ~ cell_type.x, +# TRUE ~ cell_type.x), +# ) %>% +# dplyr::rowwise() %>% +# dplyr::mutate(other_names = dplyr::case_when( +# (!is.na(cell_type.x)&!is.na(cell_type.y)) & (cell_type.y!= cell_type.x) ~ paste(sort(unique(c(unlist(strsplit(other_names,split=", ")),cell_type.y))),collapse=", "), +# TRUE ~ other_names +# )) %>% +# dplyr::mutate( +# cell_type_source.y = cell_type_source.y, +# cell_type_source.x = cell_type_source.x, +# cell_type_source = dplyr::case_when( +# is.na(cell_type_source.x) ~ cell_type_source.y, +# is.na(cell_type_source.y) ~ cell_type_source.x, +# cell_type_source.x=="NA" ~ cell_type_source.y, +# cell_type_source.y=="NA" ~ cell_type_source.x, +# cell_type_source.x=="cave"&!is.na(cell_type_source.y) ~ cell_type_source.y, +# cell_type_source.x=="community"&!is.na(cell_type_source.y) ~ cell_type_source.y, +# cell_type_source.x==""&!is.na(cell_type_source.y) ~ cell_type_source.y, +# !is.na(cell_type_source.x)&!is.na(cell_type_source.y) ~ paste(sort(unique(c(cell_type_source.x,cell_type_source.y)), +# decreasing=TRUE), +# collapse=","), +# TRUE ~ cell_type_source.x +# )) %>% +# dplyr::filter(!is.na(cell_type_source), cell_type_source!="") %>% +# dplyr::distinct(`_id`, root_id, .keep_all = TRUE) %>% +# dplyr::select(`_id`, root_id, cell_type, other_names, cell_type_source, +# super_class, cell_class, proofread, region) %>% +# dplyr::mutate(other_names = gsub("^,|^ ,|^ ","",other_names), +# cell_type_source = ifelse(cell_type_source=='151184',NA,cell_type_source)) +# +# #Add cell type source labels +# bc.update <- as.data.frame(bc.ct) +# bc.update[is.na(bc.update)] <- '' +# banctable_update_rows(base='banc_meta', +# table = "banc_meta", +# df = bc.update[,c("_id","cell_type", "other_names", "cell_type_source")], +# append_allowed = FALSE, +# chunksize = 1000) + + + + + + diff --git a/R/data.R b/R/data.R index 5194f3b..fc6d381 100644 --- a/R/data.R +++ b/R/data.R @@ -200,4 +200,29 @@ #' `banc_brain_neuropil.surf$RegionList` "banc_volumes.df" +#' User information (name + CAVE ID) for active BANC users +#' +#' @name banc_users +#' @docType data +#' @description The purpose of this table is to map CAVE users IDs to names, in order to credit annotation work done in BANC CAVE. +#' This information is based on \href{https://docs.google.com/spreadsheets/d/1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8/edit?gid=1163959922#gid=1163959922}{google sheet}. +#' +#' @examples +#' \dontrun{ +#' View(banc_users) +#' } +"banc_users" + +# banc_users <- googlesheets4::read_sheet("1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8") +# colnames(banc_users) <- snakecase::to_snake_case(colnames(banc_users)) +# banc_users <- banc_users %>% +# dplyr::select(name, pi_lab, cave_id) %>% +# dplyr::mutate(name = gsub("\\(.*","",name)) %>% +# dplyr::mutate(pi_lab = gsub("\\(.*","",pi_lab)) %>% +# dplyr::mutate(pi_lab = ifelse(is.na(pi_lab),name,pi_lab)) %>% +# dplyr::mutate(pi_lab = ifelse(pi_lab=="PI",paste0(name," lab"),pi_lab)) %>% +# dplyr::arrange(pi_lab, name) %>% +# dplyr::distinct(cave_id, .keep_all = TRUE) %>% +# dplyr::filter(!is.na(pi_lab)) +# usethis::use_data(banc_users, overwrite = TRUE) diff --git a/R/l2.R b/R/l2.R index 786c917..c4bbebe 100644 --- a/R/l2.R +++ b/R/l2.R @@ -92,8 +92,8 @@ banc_read_l2skel <- function(id, OmitFailures=TRUE, dataset=NULL, ...) { #' @param id (Optional) The `root_id` of the neuron in the `roots` data #' frame. If NULL, it will be taken from the `x$root_id` slot. #' @param roots A data frame containing information about root points, i.e. nuclei -#' obtained using `bancr::roots()`. This data frame is assumed to have -#' columns named `root_id` and `pt_position`, where `pt_position` +#' obtained using `bancr:::banc_roots()`. This data frame is assumed to have +#' columns named `root_id` and `root_position_nm`, where `root_position_nm` #' specifies the 3D coordinates of the soma for each `root_id`. #' @param estimate if \code{TRUE} and nucleus position is not in `roots`, #' then root is estimated as a leaf node furthest outside of the brain neuropil. @@ -122,9 +122,18 @@ banc_reroot.neuron <- function(x, id = NULL, roots = NULL, estimate = TRUE, ...) if(is.null(id)){ stop("a root_id in roots must be given") } - df <- subset(roots, roots$root_id==id & !is.na(roots$pt_position)) + if("root_position_nm"%in%colnames(roots)){ + if("root_position"%in%colnames(roots)){ + warning("root_position_nm, converting root_position to root_position_nm") + roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords) + roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm) + }else{ + stop("root_position_nm not found in roots") + } + } + df <- subset(roots, roots$root_id==id & !is.na(roots$root_position_nm)) if(nrow(df)){ - soma <- nat::xyzmatrix(df$pt_position)[1,] + soma <- nat::xyzmatrix(df$root_position_nm)[1,] x <- nat::reroot(x = x, point = c(soma)) x$tags$soma <- nat::rootpoints(x) }else if (estimate){ # As best we can @@ -161,23 +170,44 @@ banc_reroot.neuronlist <- function(x, id = NULL, roots = NULL, estimate = TRUE, if(is.null(roots)){ roots <- banc_roots() } + if("root_position_nm"%in%colnames(roots)){ + if("root_position"%in%colnames(roots)){ + warning("root_position_nm, converting root_position to root_position_nm") + roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords) + roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm) + }else{ + stop("root_position_nm not found in roots") + } + } x <- add_field_seq(x, entries=id, field="id") nat::nlapply(x, FUN = banc_reroot.neuron, roots = roots, id = id, estimate = estimate, ...) } # hidden -banc_roots <- function(rawcoords = FALSE){ - roots <- bancr::banc_nuclei(rawcoords = rawcoords) - roots$pt_position <- roots$nucleus_position_nm - info <- banc_cell_info(rawcoords = rawcoords) +banc_roots <- function(){ + # Get roots from nuclei table + roots <- banc_nuclei(rawcoords = FALSE) + roots$root_position_nm <- roots$nucleus_position_nm + # Neurons with no nuclei are mostly sensory, their roots are usually their tracked point + info <- banc_cell_info(rawcoords = FALSE) info$root_id <-info$pt_root_id xyz <- nat::xyzmatrix(info$pt_position) p <- nat::pointsinside(xyz,surf=bancr::banc_brain_neuropil.surf) info <- info[!p,] - roots <- rbind(roots[,c("root_id","pt_position")],info[,c("root_id","pt_position")]) + info$root_position_nm <- info$pt_position + # Compile + roots <- rbind(roots[,c("root_id","root_position_nm")], + info[,c("root_id","root_position_nm")]) + roots$root_position <- apply(banc_nm2raw(roots$root_position_nm),1, paste_coords) + roots$root_position <- gsub("\\(|\\)","",roots$root_position) roots } +# hidden +paste_coords <- function (xyz, sep = ", ", brackets = TRUE) { + paste0(ifelse(brackets, "(", NULL), paste(xyz, sep = sep,collapse = sep), ifelse(brackets, ")", NULL)) +} + # hidden add_field_seq <- function (x, entries, field = "id", ...) { x = nat::as.neuronlist(x) diff --git a/data/banc_users.rda b/data/banc_users.rda new file mode 100644 index 0000000..01fd697 Binary files /dev/null and b/data/banc_users.rda differ diff --git a/man/banc_reroot.Rd b/man/banc_reroot.Rd index b099253..b54503a 100644 --- a/man/banc_reroot.Rd +++ b/man/banc_reroot.Rd @@ -19,8 +19,8 @@ banc_reroot(x, id = NULL, roots = NULL, estimate = TRUE, ...) frame. If NULL, it will be taken from the \code{x$root_id} slot.} \item{roots}{A data frame containing information about root points, i.e. nuclei -obtained using \code{bancr::roots()}. This data frame is assumed to have -columns named \code{root_id} and \code{pt_position}, where \code{pt_position} +obtained using \code{bancr:::banc_roots()}. This data frame is assumed to have +columns named \code{root_id} and \code{root_position_nm}, where \code{root_position_nm} specifies the 3D coordinates of the soma for each \code{root_id}.} \item{estimate}{if \code{TRUE} and nucleus position is not in \code{roots}, diff --git a/man/banc_users.Rd b/man/banc_users.Rd new file mode 100644 index 0000000..8559364 --- /dev/null +++ b/man/banc_users.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{banc_users} +\alias{banc_users} +\title{User information (name + CAVE ID) for active BANC users} +\format{ +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 206 rows and 3 columns. +} +\usage{ +banc_users +} +\description{ +The purpose of this table is to map CAVE users IDs to names, in order to credit annotation work done in BANC CAVE. +This information is based on \href{https://docs.google.com/spreadsheets/d/1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8/edit?gid=1163959922#gid=1163959922}{google sheet}. +} +\examples{ +\dontrun{ +View(banc_users) +} +} +\keyword{datasets}