Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to BANC registrations and seatable access #4

Merged
merged 17 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ Imports:
readr,
checkmate,
utils,
ggplot2 (>= 3.5.1.9000)
reticulate,
ggplot2 (>= 3.5),
stringr,
memoise
Suggests:
testthat (>= 3.0.0),
reticulate,
readobj,
Rvcg,
Morpho,
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,15 @@ export(banc_segid_from_cellid)
export(banc_set_token)
export(banc_side_view)
export(banc_to_JRC2018F)
export(banc_upload_mesh)
export(banc_view)
export(banc_vnc_view)
export(banc_voxdims)
export(banc_xyz2id)
export(banctable_login)
export(banctable_query)
export(banctable_set_token)
export(banctable_update_rows)
export(choose_banc)
export(dr_banc)
export(elastix_xform)
Expand All @@ -74,6 +79,13 @@ importFrom(magrittr,"%>%")
importFrom(nat,xyzmatrix)
importFrom(nat,xyzmatrix2str)
importFrom(pbapply,pbsapply)
importFrom(reticulate,conda_create)
importFrom(reticulate,conda_install)
importFrom(reticulate,conda_list)
importFrom(reticulate,import)
importFrom(reticulate,py_install)
importFrom(reticulate,use_condaenv)
importFrom(rlang,.data)
importFrom(utils,browseURL)
importFrom(utils,packageVersion)
importFrom(utils,write.table)
231 changes: 231 additions & 0 deletions R/banc-table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#' @title Read and write to the seatable for draft BANC annotations
#'
#' @description These functions use the logic and wrap some code
#' from the `flytable_.*` functions in the `fafbseg` R package.
#' \code{banctable_set_token} will obtain and store a permanent
#' seatable user-level API token.
#' \code{banctable_query} performs a SQL query against a banctable
#' database. You can omit the \code{base} argument unless you have tables of
#' the same name in different bases.
#' \code{banctable_base} returns a \code{base} object (equivalent to
#' a mysql database) which allows you to access one or more tables, logging in
#' to the service if necessary. The returned base object give you full access
#' to the Python
#' \href{https://seatable.github.io/seatable-scripts/python/base/}{\code{Base}}
#' API allowing a range of row/column manipulations.
#' \code{banctable_update_rows} updates existing rows in a table, returning TRUE on success.
#'
#' @param sql A SQL query string. See examples and
#' \href{https://seatable.github.io/seatable-scripts/python/query/}{seatable
#' docs}.
#' @param limit An optional limit, which only applies if you do not specify a
#' limit directly in the \code{sql} query. By default seatable limits SQL
#' queries to 100 rows. We increase the limit to 100000 rows by default.
#' @param convert Expert use only: Whether or not to allow the Python seatable
#' module to process raw output from the database. This is is principally for
#' debugging purposes. NB this imposes a requirement of seatable_api >=2.4.0.
#' @param python Logical. Whether to return a Python pandas DataFrame. The default of FALSE returns an R data.frame
#' @param base Character vector specifying the \code{base}
#' @param table Character vector specifying a table foe which you want a
#' \code{base} object.
# @param workspace_id A numeric id specifying the workspace. Advanced use only
# since we can normally figure this out from \code{base_name}.
# @param cached Whether to use a cached base object
#' @param token normally retrieved from \code{BANCTABLE_TOKEN} environment
#' variable.
#' @param user,pwd banctable user and password used by \code{banctable_set_token}
#' to obtain a token
#' @param url Optional URL to the server
#' @param ac A seatable connection object as returned by \code{banctable_login}.
#' @param df A data.frame containing the data to upload including an `_id`
#' column that can identify each row in the remote table.
#' @param append_allowed Logical. Whether rows without row identifiers can be appended.
#' @param chunksize To split large requests into smaller ones with max this many rows.
#' @param ... Additional arguments passed to pbsapply which might include cl=2 to specify a number of parallel jobs to run.
#'
#' @return a \code{data.frame} of results. There should be 0 rows if no rows
#' matched query.
#'
#' @seealso \code{fafbseg::\link{flytable_query}}
#' @examples
#' \dontrun{
#' # Do this once
#' banctable_set_token(user="MY_EMAIL_FOR_SEATABLE.com",
#' pwd="MY_SEATABLE_PASSWORD",
#' url="https://cloud.seatable.io/")
#'
#' # Thereafter:
#' banc.meta <- banctable_query()
#' }
#' @export
#' @rdname banctable_query
banctable_query <- function (sql = "SELECT * FROM banc_meta",
limit = 100000L,
base = NULL,
python = FALSE,
convert = TRUE,
ac = NULL){
if(is.null(ac)) ac <- banctable_login()
checkmate::assert_character(sql, len = 1, pattern = "select",
ignore.case = T)
res = stringr::str_match(sql, stringr::regex("\\s+FROM\\s+[']{0,1}([^, ']+).*",
ignore_case = T))
if (any(is.na(res)[, 2]))
stop("Cannot identify a table name in your sql statement!\n")
table = res[, 2]
if (is.null(base)) {
base = try(banctable_base(table = table))
if (inherits(base, "try-error"))
stop("I inferred table_name: ", table, " from your SQL query but couldn't connect to a base with this table!")
}
else if (is.character(base))
base = banctable_base(base_name = base)
if (!isTRUE(grepl("\\s+limit\\s+\\d+", sql)) && !isFALSE(limit)) {
if (!is.finite(limit))
limit = .Machine$integer.max
sql = paste(sql, "LIMIT", limit)
}
pyout <- reticulate::py_capture_output(ll <- try(reticulate::py_call(base$query,
sql, convert = convert), silent = T))
if (inherits(ll, "try-error")) {
warning(paste("No rows returned by banctable", pyout,
collapse = "\n"))
return(NULL)
}
pd = reticulate::import("pandas")
reticulate::py_capture_output(pdd <- reticulate::py_call(pd$DataFrame,
ll))
if (python)
pdd
else {
colinfo = fafbseg::flytable_columns(table, base)
df = fafbseg:::flytable2df(fafbseg:::pandas2df(pdd, use_arrow = F), tidf = colinfo)
fields = fafbseg:::sql2fields(sql)
if (length(fields) == 1 && fields == "*") {
toorder = intersect(colinfo$name, colnames(df))
}
else {
toorder = intersect(fafbseg:::sql2fields(sql), colnames(df))
}
rest = setdiff(colnames(df), toorder)
df[c(toorder, rest)]
}
}

#' @export
#' @rdname banctable_query
banctable_set_token <- function(user, pwd, url = "https://cloud.seatable.io/"){
st <- fafbseg:::check_seatable()
ac <- reticulate::py_call(st$Account, login_name = user,
password = pwd, server_url = url)
ac$auth()
Sys.setenv(banctable_TOKEN = ac$token)
cat("banctable_TOKEN='", ac$token, "'\n", sep = "", append = TRUE,
file = path.expand("~/.Renviron"))
return(invisible(NULL))
}

#' @export
#' @rdname banctable_query
banctable_login <- function(url = "https://cloud.seatable.io/",
token = Sys.getenv("BANCTABLE_TOKEN", unset = NA_character_)){
fafbseg::flytable_login(url=url, token=token)
}


#' @export
#' @rdname banctable_query
banctable_update_rows <- function (df, table, base = NULL, append_allowed = TRUE, chunksize = 1000L, ...) {
if (is.character(base) || is.null(base))
base = banctable_base(base_name = base, table = table)
nx = nrow(df)
if (!isTRUE(nx > 0)) {
warning("No rows to update in `df`!")
return(TRUE)
}
df = fafbseg:::df2flytable(df, append = ifelse(append_allowed, NA,
FALSE))
newrows = is.na(df[["row_id"]])
if (any(newrows)) {
stop("Adding new rows not yet implemented")
# flytable_append_rows(df[newrows, , drop = FALSE], table = table,
# base = base, chunksize = chunksize, ...)
# df = df[!newrows, , drop = FALSE]
# nx = nrow(df)
}
if (!isTRUE(nx > 0))
return(TRUE)
if (nx > chunksize) {
nchunks = ceiling(nx/chunksize)
chunkids = rep(seq_len(nchunks), rep(chunksize, nchunks))[seq_len(nx)]
chunks = split(df, chunkids)
oks = pbapply::pbsapply(chunks, banctable_update_rows,
table = table, base = base, chunksize = Inf, append_allowed = FALSE,
...)
return(all(oks))
}
pyl = fafbseg:::df2updatepayload(df)
res = base$batch_update_rows(table_name = table, rows_data = pyl)
ok = isTRUE(all.equal(res, list(success = TRUE)))
return(ok)
}

# hidden
banctable_base <- function (base_name = "banc_meta",
table = NULL,
url = "https://cloud.seatable.io/",
workspace_id = "57832",
cached = TRUE,
ac = NULL) {
if(is.null(ac)) ac <- banctable_login()
if (!cached)
memoise::forget(banctable_base_impl)
base = try({
banctable_base_impl(table = table, base_name = base_name,
url = url, workspace_id = workspace_id)
}, silent = TRUE)
stale_token <- isTRUE(try(difftime(base$jwt_exp, Sys.time(),
units = "hours") < 1, silent = T))
retry = (cached && inherits(base, "try-error")) || stale_token
if (!retry)
return(base)
memoise::forget(banctable_base_impl)
banctable_base_impl(table = table, base_name = base_name,
url = url, workspace_id = workspace_id)
}

# hidden
banctable_base_impl <- function (base_name = "banc_meta",
table = NULL,
url = "https://cloud.seatable.io/",
workspace_id = "57832",
ac = NULL){
if(is.null(ac)) ac <- banctable_login()
if (is.null(base_name) && is.null(table))
stop("you must supply one of base or table name!")
if (is.null(base_name)) {
base = fafbseg:::flytable_base4table(table, ac = ac, cached = F)
return(invisible(base))
}
if (is.null(workspace_id)) {
wsdf = fafbseg:::flytable_workspaces(ac = ac)
wsdf.sel = subset(wsdf, wsdf$name == base_name)
if (nrow(wsdf.sel) == 0)
stop("Unable to find a workspace containing basename:",
base_name, "\nCheck basename and/or access permissions.")
if (nrow(wsdf.sel) > 1)
stop("Multiple workspaces containing basename:",
base_name, "\nYou must use banctable_base() specifying a workspace_id to resolve this ambiguity.")
workspace_id = wsdf.sel[["workspace_id"]]
}
base = reticulate::py_call(ac$get_base, workspace_id = workspace_id,
base_name = base_name)
base
}







39 changes: 38 additions & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@
#' @rdname banc.surf
"banc_al.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_neuropils.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_tracts.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_nerves.surf"

## How it was obtained:
# res <- httr::GET("https://www.googleapis.com/storage/v1/b/zetta_lee_fly_cns_001_kisuk/o/final%2Fv2%2Fvolume_meshes%2Fmeshes%2F1%3A0.drc?alt=media&neuroglancer=610000b05b6497edcf20b78f29516970")
# httr::stop_for_status(res)
Expand Down Expand Up @@ -102,6 +114,14 @@
#' @rdname banc_to_jrc2018f_tpsreg
"jrc2018f_to_banc_tpsreg"

#' @docType data
#' @rdname banc_to_jrc2018f_tpsreg
"jrcvnc2018f_to_banc_tpsreg"

#' @docType data
#' @rdname banc_to_jrc2018f_tpsreg
"banc_to_jrcvnc2018f_tpsreg"

#' Thin-Plate Spline Registration for Mirroring in BANC Space
#'
#' @description
Expand Down Expand Up @@ -153,6 +173,23 @@
#' @docType data
"banc_mirror_tpsreg"


#' BANC neuropil name to number correspondence for neuroglancer
#'
#' @name banc.surf
#' @docType data
#' @description A BANC neuroglaner scene can be directed to a google cloud storage
#' location, where BANC-transformed standard neuropil meshes reside.
#' The source is
#' `precomputed://gs://lee-lab_brain-and-nerve-cord-fly-connectome/volume_meshes`
#' They can be
#' plotted in neuroglancer by adding this location, entering the `Seg.` pane
#' and entering the number that corresponds to the correct mesh.
#' This data frame gives the mesh name to nubmer correspondences.
#'
#' @seealso
#' \code{\link{banc.surf}} for the available neuropil objects for BANC.
#' These are `hxsruf` objects, names for subregions can be found as so:
#' `banc_neuropils.surf$RegionList`
"banc_volumes.df"


12 changes: 6 additions & 6 deletions R/l2.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@
#' # one time install of necessary python packages
#' fafbseg::simple_python(pkgs="fafbseg")
#'
#' dnp42=c("648518346507131167", "648518346485772414")
#' dnp42.latest=banc_latestid(dnp42)
#' dnp42.dps <- banc_read_l2dp(dnp42.latest)
#' dna02=c("720575941478275714", "720575941512946243")
#' dna02.latest=banc_latestid(dna02)
#' dna02.dps <- banc_read_l2dp(dna02.latest)
#'
#' # plot those
#' nclear3d()
#' plot3d(dnp42.dps, lwd=3)
#' plot3d(dna02.dps, lwd=3)
#' # nb dotprops are always in microns
#' wire3d(banc.surf/1e3, col='grey')
#'
#' nclear3d()
#' dnp42.skel <- banc_read_l2skel(dnp42.latest)
#' plot3d(dnp42.skel, lwd=2)
#' dna02.skel <- banc_read_l2skel(dna02.latest)
#' plot3d(dna02.skel, lwd=2)
#' # nb neuron skeletons are in nm
#' wire3d(banc.surf, col='grey')
#' }
Expand Down
Loading
Loading