From 95e147499950c41935fd0da93142f4363902f790 Mon Sep 17 00:00:00 2001 From: alexanderbates Date: Mon, 25 Nov 2024 19:09:31 -0500 Subject: [PATCH] functions to read from live CAVE tables as default * make it clearer what sort of position data reroot functions are using --- R/cave-tables.R | 121 +++++++++++++++++++++++++++++++++++--------- R/data.R | 25 +++++++++ R/l2.R | 48 ++++++++++++++---- data/banc_users.rda | Bin 0 -> 3467 bytes man/banc_reroot.Rd | 4 +- man/banc_users.Rd | 22 ++++++++ 6 files changed, 185 insertions(+), 35 deletions(-) create mode 100644 data/banc_users.rda create mode 100644 man/banc_users.Rd diff --git a/R/cave-tables.R b/R/cave-tables.R index 82ef423..509a193 100644 --- a/R/cave-tables.R +++ b/R/cave-tables.R @@ -67,10 +67,13 @@ banc_nuclei <- function (rootids = NULL, else if (!is.null(rootids)) { rootids <- banc_ids(rootids) nuclei <- if (length(rootids) < 200) - banc_cave_query(table = table, filter_in_dict = list(pt_root_id=rootids), + banc_cave_query(table = table, + filter_in_dict = list(pt_root_id=rootids), ...) else - banc_cave_query(table = table, live = F, ...) + banc_cave_query(table = table, + live = TRUE, + ...) if (nrow(nuclei) == 0) return(nuclei) nuclei <- nuclei %>% @@ -89,25 +92,27 @@ banc_nuclei <- function (rootids = NULL, } } else { nuclei <- banc_cave_query(table = table, - filter_in_dict = list(id=nucleus_ids), ...) + filter_in_dict = list(id=nucleus_ids), + ...) nuclei %>% dplyr::right_join(data.frame(id = as.integer64(nucleus_ids)), by = "id") %>% dplyr::select(colnames(nuclei)) } - res - if (isTRUE(rawcoords)) - res - else { - res <- res %>% - dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x) - nat::xyzmatrix2str(banc_raw2nm(x)))) - res$pt_position <- sapply(res$pt_position, paste, collapse=", ") - res <- res %>% - dplyr::rename(nucleus_id = .data$id, - nucleus_position_nm = .data$pt_position, - root_id = .data$pt_root_id) %>% - dplyr::filter(.data$valid=="t") + res$pt_position <- sapply(res$pt_position, paste, collapse=", ") + # res$pt_position_ref <- sapply(res$pt_position_ref, paste, collapse=", ") + res <- res %>% + dplyr::rename(nucleus_id = .data$id, + nucleus_position = .data$pt_position, + root_id = .data$pt_root_id) %>% + dplyr::filter(.data$valid=="t") + if (isFALSE(rawcoords)) { + # res <- res %>% + # dplyr::mutate(dplyr::across(dplyr::ends_with("position"), function(x) + # nat::xyzmatrix2str(banc_raw2nm(x)))) + res$nucleus_position_nm <- apply(banc_raw2nm(res$nucleus_position),1,paste_coords) + res$nucleus_position_nm <- gsub("\\(|\\)","",res$nucleus_position_nm) } + res } #' @rdname banc_cave_tables @@ -159,7 +164,7 @@ get_cave_table_data <- function(table, rootids = NULL, ...){ fafbseg::flywire_cave_query(table = table, filter_in_dict = list(pt_root_id=rootids), ...) } else { - fafbseg::flywire_cave_query(table = table, live = F, ...) + fafbseg::flywire_cave_query(table = table, live = TRUE, ...) } } else { df <- fafbseg::flywire_cave_query(table = table , ...) @@ -168,10 +173,11 @@ get_cave_table_data <- function(table, rootids = NULL, ...){ } # hidden -banc_cave_cell_types <- function(){ +banc_cave_cell_types <- function(user_id = NULL){ banc.cell.info <- banc_cell_info(rawcoords = TRUE) banc.cell.info$pt_position <- sapply(banc.cell.info$pt_position, paste, collapse=", ") banc.cell.info.mod <- banc.cell.info %>% + dplyr::filter(valid == 't') %>% dplyr::rowwise() %>% dplyr::mutate(pt_position = paste0(pt_position,collapse=",")) %>% dplyr::ungroup() %>% @@ -189,10 +195,7 @@ banc_cave_cell_types <- function(){ TRUE ~ NA )) %>% dplyr::mutate(user_id = dplyr::case_when( - grepl("neuron identity", tag2) ~ user_id, - grepl("^DN|^AMMC|^PDN|^LH|^il|^T1|^T5|^T4|^TY4|^IN|^il|^HS|^Mi|^PS|^CB|^FB|^AL| - ^FET|^bCS|SEZ-NSC-Hugin|^MDN|^OA|^PS|^ovi|giant fiber|^m-NSC|^l-NSC-ITP - |^OA|^LH|^CSD|^BDN|^AN|^AL|^AV|^AN|^MN|^SA|^Mi|^LH|^L1|^BDN|^LAL",tag) ~ user_id, + !is.na(cell_type) ~ user_id, TRUE ~ NA )) %>% dplyr::mutate(cell_type = gsub("\\\n.*|\\*.*","",cell_type)) %>% @@ -224,10 +227,80 @@ banc_cave_cell_types <- function(){ cell_class = paste(unique(na.omit(sort(cell_class))), collapse = ", "), super_class = paste(unique(na.omit(sort(super_class))), collapse = ", "), cell_type = paste(unique(na.omit(sort(cell_type))), collapse = ", "), - side = paste(unique(na.omit(sort(side))), collapse = ", ")) %>% + side = paste(unique(na.omit(sort(side))), collapse = ", "), + user_id = paste(unique(na.omit(sort(user_id))), collapse = ", ")) %>% dplyr::ungroup() %>% dplyr::rename(cell_id = id, root_id = pt_root_id, supervoxel_id = pt_supervoxel_id, position = pt_position) %>% dplyr::distinct(root_id, supervoxel_id, side, super_class, cell_class, cell_type, .keep_all = TRUE) %>% - dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id, notes) + dplyr::select(cell_id, root_id, supervoxel_id, position, side, super_class, cell_class, cell_type, user_id,notes) %>% + dplyr::left_join(banc_users %>% dplyr::distinct(pi_lab,cave_id) %>% dplyr::mutate(cave_id=as.character(cave_id)), + by=c("user_id"="cave_id")) %>% + dplyr::rename(cell_type_source = pi_lab) banc.cell.info.mod } + +# # # Updated cell_type_source column based on CAVE +# banc.cell.info.mod <- banc_cave_cell_types() +# banc.cell.info.mod <- subset(banc.cell.info.mod, ! user_id %in% c(355,52)) +# bc.all <- banctable_query("SELECT _id, root_id, cell_type, other_names, super_class, cell_class, proofread, region, cell_type_source from banc_meta") +# bc.all$cell_type_source <- unlist(sapply(bc.all$cell_type_source ,function(x) paste(unlist(x),collapse=", "))) +# bc.ct <- bc.all %>% +# dplyr::left_join(banc.cell.info.mod %>% +# dplyr::mutate(root_id=as.character(root_id)) %>% +# dplyr::distinct(root_id, cell_type, cell_type_source), +# by = "root_id") %>% +# dplyr::mutate( +# other_names = ifelse(is.na(other_names),'',other_names), +# cell_type_source.y = gsub("Rachel Wilson Lab", "Wilson lab", cell_type_source.y), +# cell_type_source.y = ifelse(is.na(cell_type_source.y),NA,tolower(cell_type_source.y)), +# cell_type_source.x = ifelse(is.na(cell_type_source.x),NA,tolower(cell_type_source.x)), +# cell_type_source.x = ifelse(grepl("NA|na|princeton|community|CAVE|Princeton",cell_type_source.x),NA,cell_type_source.x), +# cell_type_source.x = ifelse(cell_type_source.x%in%c("","NA"),NA,cell_type_source.x), +# cell_type_source.y = ifelse(cell_type_source.y%in%c("","NA"),NA,cell_type_source.y)) %>% +# dplyr::mutate(cell_type = dplyr::case_when( +# is.na(cell_type.x) ~ cell_type.y, +# is.na(cell_type.y) ~ cell_type.x, +# TRUE ~ cell_type.x), +# ) %>% +# dplyr::rowwise() %>% +# dplyr::mutate(other_names = dplyr::case_when( +# (!is.na(cell_type.x)&!is.na(cell_type.y)) & (cell_type.y!= cell_type.x) ~ paste(sort(unique(c(unlist(strsplit(other_names,split=", ")),cell_type.y))),collapse=", "), +# TRUE ~ other_names +# )) %>% +# dplyr::mutate( +# cell_type_source.y = cell_type_source.y, +# cell_type_source.x = cell_type_source.x, +# cell_type_source = dplyr::case_when( +# is.na(cell_type_source.x) ~ cell_type_source.y, +# is.na(cell_type_source.y) ~ cell_type_source.x, +# cell_type_source.x=="NA" ~ cell_type_source.y, +# cell_type_source.y=="NA" ~ cell_type_source.x, +# cell_type_source.x=="cave"&!is.na(cell_type_source.y) ~ cell_type_source.y, +# cell_type_source.x=="community"&!is.na(cell_type_source.y) ~ cell_type_source.y, +# cell_type_source.x==""&!is.na(cell_type_source.y) ~ cell_type_source.y, +# !is.na(cell_type_source.x)&!is.na(cell_type_source.y) ~ paste(sort(unique(c(cell_type_source.x,cell_type_source.y)), +# decreasing=TRUE), +# collapse=","), +# TRUE ~ cell_type_source.x +# )) %>% +# dplyr::filter(!is.na(cell_type_source), cell_type_source!="") %>% +# dplyr::distinct(`_id`, root_id, .keep_all = TRUE) %>% +# dplyr::select(`_id`, root_id, cell_type, other_names, cell_type_source, +# super_class, cell_class, proofread, region) %>% +# dplyr::mutate(other_names = gsub("^,|^ ,|^ ","",other_names), +# cell_type_source = ifelse(cell_type_source=='151184',NA,cell_type_source)) +# +# #Add cell type source labels +# bc.update <- as.data.frame(bc.ct) +# bc.update[is.na(bc.update)] <- '' +# banctable_update_rows(base='banc_meta', +# table = "banc_meta", +# df = bc.update[,c("_id","cell_type", "other_names", "cell_type_source")], +# append_allowed = FALSE, +# chunksize = 1000) + + + + + + diff --git a/R/data.R b/R/data.R index 5194f3b..fc6d381 100644 --- a/R/data.R +++ b/R/data.R @@ -200,4 +200,29 @@ #' `banc_brain_neuropil.surf$RegionList` "banc_volumes.df" +#' User information (name + CAVE ID) for active BANC users +#' +#' @name banc_users +#' @docType data +#' @description The purpose of this table is to map CAVE users IDs to names, in order to credit annotation work done in BANC CAVE. +#' This information is based on \href{https://docs.google.com/spreadsheets/d/1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8/edit?gid=1163959922#gid=1163959922}{google sheet}. +#' +#' @examples +#' \dontrun{ +#' View(banc_users) +#' } +"banc_users" + +# banc_users <- googlesheets4::read_sheet("1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8") +# colnames(banc_users) <- snakecase::to_snake_case(colnames(banc_users)) +# banc_users <- banc_users %>% +# dplyr::select(name, pi_lab, cave_id) %>% +# dplyr::mutate(name = gsub("\\(.*","",name)) %>% +# dplyr::mutate(pi_lab = gsub("\\(.*","",pi_lab)) %>% +# dplyr::mutate(pi_lab = ifelse(is.na(pi_lab),name,pi_lab)) %>% +# dplyr::mutate(pi_lab = ifelse(pi_lab=="PI",paste0(name," lab"),pi_lab)) %>% +# dplyr::arrange(pi_lab, name) %>% +# dplyr::distinct(cave_id, .keep_all = TRUE) %>% +# dplyr::filter(!is.na(pi_lab)) +# usethis::use_data(banc_users, overwrite = TRUE) diff --git a/R/l2.R b/R/l2.R index 786c917..c4bbebe 100644 --- a/R/l2.R +++ b/R/l2.R @@ -92,8 +92,8 @@ banc_read_l2skel <- function(id, OmitFailures=TRUE, dataset=NULL, ...) { #' @param id (Optional) The `root_id` of the neuron in the `roots` data #' frame. If NULL, it will be taken from the `x$root_id` slot. #' @param roots A data frame containing information about root points, i.e. nuclei -#' obtained using `bancr::roots()`. This data frame is assumed to have -#' columns named `root_id` and `pt_position`, where `pt_position` +#' obtained using `bancr:::banc_roots()`. This data frame is assumed to have +#' columns named `root_id` and `root_position_nm`, where `root_position_nm` #' specifies the 3D coordinates of the soma for each `root_id`. #' @param estimate if \code{TRUE} and nucleus position is not in `roots`, #' then root is estimated as a leaf node furthest outside of the brain neuropil. @@ -122,9 +122,18 @@ banc_reroot.neuron <- function(x, id = NULL, roots = NULL, estimate = TRUE, ...) if(is.null(id)){ stop("a root_id in roots must be given") } - df <- subset(roots, roots$root_id==id & !is.na(roots$pt_position)) + if("root_position_nm"%in%colnames(roots)){ + if("root_position"%in%colnames(roots)){ + warning("root_position_nm, converting root_position to root_position_nm") + roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords) + roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm) + }else{ + stop("root_position_nm not found in roots") + } + } + df <- subset(roots, roots$root_id==id & !is.na(roots$root_position_nm)) if(nrow(df)){ - soma <- nat::xyzmatrix(df$pt_position)[1,] + soma <- nat::xyzmatrix(df$root_position_nm)[1,] x <- nat::reroot(x = x, point = c(soma)) x$tags$soma <- nat::rootpoints(x) }else if (estimate){ # As best we can @@ -161,23 +170,44 @@ banc_reroot.neuronlist <- function(x, id = NULL, roots = NULL, estimate = TRUE, if(is.null(roots)){ roots <- banc_roots() } + if("root_position_nm"%in%colnames(roots)){ + if("root_position"%in%colnames(roots)){ + warning("root_position_nm, converting root_position to root_position_nm") + roots$root_position_nm <- apply(banc_raw2nm(roots$root_position),1, paste_coords) + roots$root_position_nm <- gsub("\\(|\\)","",roots$root_position_nm) + }else{ + stop("root_position_nm not found in roots") + } + } x <- add_field_seq(x, entries=id, field="id") nat::nlapply(x, FUN = banc_reroot.neuron, roots = roots, id = id, estimate = estimate, ...) } # hidden -banc_roots <- function(rawcoords = FALSE){ - roots <- bancr::banc_nuclei(rawcoords = rawcoords) - roots$pt_position <- roots$nucleus_position_nm - info <- banc_cell_info(rawcoords = rawcoords) +banc_roots <- function(){ + # Get roots from nuclei table + roots <- banc_nuclei(rawcoords = FALSE) + roots$root_position_nm <- roots$nucleus_position_nm + # Neurons with no nuclei are mostly sensory, their roots are usually their tracked point + info <- banc_cell_info(rawcoords = FALSE) info$root_id <-info$pt_root_id xyz <- nat::xyzmatrix(info$pt_position) p <- nat::pointsinside(xyz,surf=bancr::banc_brain_neuropil.surf) info <- info[!p,] - roots <- rbind(roots[,c("root_id","pt_position")],info[,c("root_id","pt_position")]) + info$root_position_nm <- info$pt_position + # Compile + roots <- rbind(roots[,c("root_id","root_position_nm")], + info[,c("root_id","root_position_nm")]) + roots$root_position <- apply(banc_nm2raw(roots$root_position_nm),1, paste_coords) + roots$root_position <- gsub("\\(|\\)","",roots$root_position) roots } +# hidden +paste_coords <- function (xyz, sep = ", ", brackets = TRUE) { + paste0(ifelse(brackets, "(", NULL), paste(xyz, sep = sep,collapse = sep), ifelse(brackets, ")", NULL)) +} + # hidden add_field_seq <- function (x, entries, field = "id", ...) { x = nat::as.neuronlist(x) diff --git a/data/banc_users.rda b/data/banc_users.rda new file mode 100644 index 0000000000000000000000000000000000000000..01fd6974134a4a8bd71754ea596f64f8f402fd9a GIT binary patch literal 3467 zcmV;64RrECT4*^jL0KkKS#_Tj;Q$N&|NsC0|NX|_|9}7g|Mvg?|M>nL|Nr0p|Hs^2 z|MAoiKn36oULPNPG4ARs?;VA)p7*>DIf0c30lm)X8Mj`+siaRSq}1AJ>WtB-^wBhF z=s@){41j22GHHOB4?=0`V1`VLfHE2Y(@g^-1Y~5v0LhTV(@a631Oj>hni(1sL}c;= zr<7zpO&SAD0BMjK003wKpa5t90000DG&BrA0wyC6O%M`K8cFJUngQh+GzNeT00E!_ zKmY&$0000000E!?&;S9ZLJ%Svn?*dQqfHYu(W-u_i2X!*fB?_{HlP3i0LTCU00000 z0D77L0BAr4hJzqzG&Im?1|v;0Xc_>}15E~i8USbw4FRJ-X`zq5T}%frGR#kN>rR={B;a2EpeTV92&f%W|b3NHcSp31NN=woG4T$h1k;%BMRK(W!OIb(pZwSR z`V^=H&H@+@JgvP5A%b2qh!1Gb1O;9Vf667$); z`z&*M76Q4ya+*=WaN8(%0Jc_5PG_yZJhk5^MZ*~liC}QnI2~>!Fh>Or%012jm1BZ+ z1#n{!Pao@@!APwF`8Xa&K?j437j0C>ubJ=hT4JTRnOy5Z`0y>JNr~+Tgi*Eiz?pU) z5~Q)5280zy#>~;~X4A5nZzYxM@ESNFSG=AT1;$hyLcpt02VrEoQZamJ!*J6cQ!Gj6 zQ3RQ?`=E}IV2#elSB6QUWun*)Qw z@)a<}akrj@^bAA=C=ysh&K9R2?Bp)K`|{wm^Ef|WFdbZm{lXNC;S%Db%KXrPX8DC{&c(^+eQLBVYBeof>kiXqi`ejWNV$6h=?c+vuEY9W;RjY zlJEmEx0SPcAt&$MZJPYx+U42Vc5$U+h>ZX%S6&Sg6sY%1rW&|KYO zc(S|P=H#$&nR)qByPiXT;2=1ev<3(1UWKZmd=q;^YIF~FIOP;vtA zQW$SgrnGqb3ID3SmhehZi7Bi`VZxhIRq0YJ#jhoqk-Er}FFRC3(*geh^(Yb9k2RF<=gZ8Z$L#lo5%ZH-@&jb|1@}5Hq2F4cHi;QJ?b!%ern&Eju1*H@q zS{e?4-<2~B$WLsh%Enb_)cB&r78)HGBcs#I40JC{bM?C@K}h5&On|Jw|HeI6G^#mI ztHiNXXjymUSABX7rY18nu;J8&ak9-A8s`cMdR7O_O}-foXruCj5Ke;7Mn$>Trss;$ z+xM~ zxWa8j7lyXBSV_K;b2?}6ZHSUAh>`$EtcXbx7V|5A&Tr+OVp7~6i3$0PCEc9K;n9(W5}r=t~iJh?4r}x`>^$_Nbqh z=cMAG28fD?Gti23r_$3$10ezRsboNiXpY2JPXtpD1|53uaNP9Z>s5i}s>}@SpG;*a z^83_8Q7rj$L^I|R91vR(Tnmq2S)>!$sLN9>m}bLT=`9JB8qux<=h@4Pf(B31>TO*Q^!HJ^P*MpD7bq%c=*?FN6zMlg z1tj}Rszr(C*n>2;NO5vl&`QkVlR4SRTA*>$V=m~DRV7YUPUbX#<7|+(8(>4pa`E96 zoG=TSo_%-tD2;G88KpBqx76Nbt!}9**N!@Koku#Sk%@Ujcaa`yxu^Y$olj36GcLhQD%Qe8gWv0P6JFE$0TMJ7JCnhXwL#8B4YD- zx0}i#AodBE1W466+;t^HJmTU7*w#tk8SPj_NW-z=5Dt*FQQ$k=+v;Hrp=GAniLTXZ zFfG1%T)r8&$%bg7fTe*YKtxI!9}g3EK0(thKn4v{V9ByAeh)P>5lI0>Y({_ba<}WYE+AtryW4(zk@>&Y8{Cvr?tV2!c{U$cSw8l}&l1 zV_l5`mb{i0NF?rxJM7y0hk|mWINBlz1;(S}2Qt`s@iAV}UI=S}f*AHLS|C|pxT-@m z^lXC+gEpT1=ccq4NCI@2Fp3a{)NU6c<>5q@EtV07`TBUgpW#EQ`EvwOj-sA>Sr0u%VOOimV(bWF0&m~!)d_$-8+ck2N5L3y0)qy7(Wt~%%p-}Pl+0{Lr zd(}ZmRD4}qV60@U_nd;EmNT`5&1-E^)KSb|8{1!fhf!8sfLpj2l$_Ifp6hwjCySgg!u`LculV}^8 z89r-kT^k)_)*46zvRev7CL55!^G4y*X(sH!9?fHPjtxtP1lIcCHr>t5j#G%Wu(qXK zfs-SF&D|3>U2d8 zpcQP+jld!vEeDFxN=-(cMp(dRT|`ums(!a|%f|f?QG-L#+OPD(Udd9XSu+f@pUvl%FQUy#92awq}s020zzppFt{fQ)6xF^IG_V( z9`~}i9Z68vXb^~phr1qYoDOJk<)4_E?vzLyB>cJ1 z-3VW7)kuV_ASs{ZHIQ`u027=PuBb~$pUN~uLl$s#fe6smyL1C3p|2?gc%_-qXfA!Xp-N`pv~ tX^c!fCj&Y6u%Vlhgd7&j0n8yQ_$)-&!oLp5fldF#+>uTcBpqkPI6(NIN^AfC literal 0 HcmV?d00001 diff --git a/man/banc_reroot.Rd b/man/banc_reroot.Rd index b099253..b54503a 100644 --- a/man/banc_reroot.Rd +++ b/man/banc_reroot.Rd @@ -19,8 +19,8 @@ banc_reroot(x, id = NULL, roots = NULL, estimate = TRUE, ...) frame. If NULL, it will be taken from the \code{x$root_id} slot.} \item{roots}{A data frame containing information about root points, i.e. nuclei -obtained using \code{bancr::roots()}. This data frame is assumed to have -columns named \code{root_id} and \code{pt_position}, where \code{pt_position} +obtained using \code{bancr:::banc_roots()}. This data frame is assumed to have +columns named \code{root_id} and \code{root_position_nm}, where \code{root_position_nm} specifies the 3D coordinates of the soma for each \code{root_id}.} \item{estimate}{if \code{TRUE} and nucleus position is not in \code{roots}, diff --git a/man/banc_users.Rd b/man/banc_users.Rd new file mode 100644 index 0000000..8559364 --- /dev/null +++ b/man/banc_users.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{banc_users} +\alias{banc_users} +\title{User information (name + CAVE ID) for active BANC users} +\format{ +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 206 rows and 3 columns. +} +\usage{ +banc_users +} +\description{ +The purpose of this table is to map CAVE users IDs to names, in order to credit annotation work done in BANC CAVE. +This information is based on \href{https://docs.google.com/spreadsheets/d/1UFmeWr2uF9jTLVMw3bD6nM3ejM-b-HDZz6sQBPTEoZ8/edit?gid=1163959922#gid=1163959922}{google sheet}. +} +\examples{ +\dontrun{ +View(banc_users) +} +} +\keyword{datasets}