Skip to content

Commit

Permalink
Merge pull request #4 from flyconnectome/asb-dev
Browse files Browse the repository at this point in the history
Updates to BANC registrations and seatable access
  • Loading branch information
alexanderbates authored Jul 19, 2024
2 parents a0af3fd + 36e9ae3 commit 4cdcc2f
Show file tree
Hide file tree
Showing 46 changed files with 1,173 additions and 116 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ Imports:
readr,
checkmate,
utils,
ggplot2 (>= 3.5.1.9000)
reticulate,
ggplot2 (>= 3.5),
stringr,
memoise
Suggests:
testthat (>= 3.0.0),
reticulate,
readobj,
Rvcg,
Morpho,
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,15 @@ export(banc_segid_from_cellid)
export(banc_set_token)
export(banc_side_view)
export(banc_to_JRC2018F)
export(banc_upload_mesh)
export(banc_view)
export(banc_vnc_view)
export(banc_voxdims)
export(banc_xyz2id)
export(banctable_login)
export(banctable_query)
export(banctable_set_token)
export(banctable_update_rows)
export(choose_banc)
export(dr_banc)
export(elastix_xform)
Expand All @@ -74,6 +79,13 @@ importFrom(magrittr,"%>%")
importFrom(nat,xyzmatrix)
importFrom(nat,xyzmatrix2str)
importFrom(pbapply,pbsapply)
importFrom(reticulate,conda_create)
importFrom(reticulate,conda_install)
importFrom(reticulate,conda_list)
importFrom(reticulate,import)
importFrom(reticulate,py_install)
importFrom(reticulate,use_condaenv)
importFrom(rlang,.data)
importFrom(utils,browseURL)
importFrom(utils,packageVersion)
importFrom(utils,write.table)
231 changes: 231 additions & 0 deletions R/banc-table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#' @title Read and write to the seatable for draft BANC annotations
#'
#' @description These functions use the logic and wrap some code
#' from the `flytable_.*` functions in the `fafbseg` R package.
#' \code{banctable_set_token} will obtain and store a permanent
#' seatable user-level API token.
#' \code{banctable_query} performs a SQL query against a banctable
#' database. You can omit the \code{base} argument unless you have tables of
#' the same name in different bases.
#' \code{banctable_base} returns a \code{base} object (equivalent to
#' a mysql database) which allows you to access one or more tables, logging in
#' to the service if necessary. The returned base object give you full access
#' to the Python
#' \href{https://seatable.github.io/seatable-scripts/python/base/}{\code{Base}}
#' API allowing a range of row/column manipulations.
#' \code{banctable_update_rows} updates existing rows in a table, returning TRUE on success.
#'
#' @param sql A SQL query string. See examples and
#' \href{https://seatable.github.io/seatable-scripts/python/query/}{seatable
#' docs}.
#' @param limit An optional limit, which only applies if you do not specify a
#' limit directly in the \code{sql} query. By default seatable limits SQL
#' queries to 100 rows. We increase the limit to 100000 rows by default.
#' @param convert Expert use only: Whether or not to allow the Python seatable
#' module to process raw output from the database. This is is principally for
#' debugging purposes. NB this imposes a requirement of seatable_api >=2.4.0.
#' @param python Logical. Whether to return a Python pandas DataFrame. The default of FALSE returns an R data.frame
#' @param base Character vector specifying the \code{base}
#' @param table Character vector specifying a table foe which you want a
#' \code{base} object.
# @param workspace_id A numeric id specifying the workspace. Advanced use only
# since we can normally figure this out from \code{base_name}.
# @param cached Whether to use a cached base object
#' @param token normally retrieved from \code{BANCTABLE_TOKEN} environment
#' variable.
#' @param user,pwd banctable user and password used by \code{banctable_set_token}
#' to obtain a token
#' @param url Optional URL to the server
#' @param ac A seatable connection object as returned by \code{banctable_login}.
#' @param df A data.frame containing the data to upload including an `_id`
#' column that can identify each row in the remote table.
#' @param append_allowed Logical. Whether rows without row identifiers can be appended.
#' @param chunksize To split large requests into smaller ones with max this many rows.
#' @param ... Additional arguments passed to pbsapply which might include cl=2 to specify a number of parallel jobs to run.
#'
#' @return a \code{data.frame} of results. There should be 0 rows if no rows
#' matched query.
#'
#' @seealso \code{fafbseg::\link{flytable_query}}
#' @examples
#' \dontrun{
#' # Do this once
#' banctable_set_token(user="MY_EMAIL_FOR_SEATABLE.com",
#' pwd="MY_SEATABLE_PASSWORD",
#' url="https://cloud.seatable.io/")
#'
#' # Thereafter:
#' banc.meta <- banctable_query()
#' }
#' @export
#' @rdname banctable_query
banctable_query <- function (sql = "SELECT * FROM banc_meta",
limit = 100000L,
base = NULL,
python = FALSE,
convert = TRUE,
ac = NULL){
if(is.null(ac)) ac <- banctable_login()
checkmate::assert_character(sql, len = 1, pattern = "select",
ignore.case = T)
res = stringr::str_match(sql, stringr::regex("\\s+FROM\\s+[']{0,1}([^, ']+).*",
ignore_case = T))
if (any(is.na(res)[, 2]))
stop("Cannot identify a table name in your sql statement!\n")
table = res[, 2]
if (is.null(base)) {
base = try(banctable_base(table = table))
if (inherits(base, "try-error"))
stop("I inferred table_name: ", table, " from your SQL query but couldn't connect to a base with this table!")
}
else if (is.character(base))
base = banctable_base(base_name = base)
if (!isTRUE(grepl("\\s+limit\\s+\\d+", sql)) && !isFALSE(limit)) {
if (!is.finite(limit))
limit = .Machine$integer.max
sql = paste(sql, "LIMIT", limit)
}
pyout <- reticulate::py_capture_output(ll <- try(reticulate::py_call(base$query,
sql, convert = convert), silent = T))
if (inherits(ll, "try-error")) {
warning(paste("No rows returned by banctable", pyout,
collapse = "\n"))
return(NULL)
}
pd = reticulate::import("pandas")
reticulate::py_capture_output(pdd <- reticulate::py_call(pd$DataFrame,
ll))
if (python)
pdd
else {
colinfo = fafbseg::flytable_columns(table, base)
df = fafbseg:::flytable2df(fafbseg:::pandas2df(pdd, use_arrow = F), tidf = colinfo)
fields = fafbseg:::sql2fields(sql)
if (length(fields) == 1 && fields == "*") {
toorder = intersect(colinfo$name, colnames(df))
}
else {
toorder = intersect(fafbseg:::sql2fields(sql), colnames(df))
}
rest = setdiff(colnames(df), toorder)
df[c(toorder, rest)]
}
}

#' @export
#' @rdname banctable_query
banctable_set_token <- function(user, pwd, url = "https://cloud.seatable.io/"){
st <- fafbseg:::check_seatable()
ac <- reticulate::py_call(st$Account, login_name = user,
password = pwd, server_url = url)
ac$auth()
Sys.setenv(banctable_TOKEN = ac$token)
cat("banctable_TOKEN='", ac$token, "'\n", sep = "", append = TRUE,
file = path.expand("~/.Renviron"))
return(invisible(NULL))
}

#' @export
#' @rdname banctable_query
banctable_login <- function(url = "https://cloud.seatable.io/",
token = Sys.getenv("BANCTABLE_TOKEN", unset = NA_character_)){
fafbseg::flytable_login(url=url, token=token)
}


#' @export
#' @rdname banctable_query
banctable_update_rows <- function (df, table, base = NULL, append_allowed = TRUE, chunksize = 1000L, ...) {
if (is.character(base) || is.null(base))
base = banctable_base(base_name = base, table = table)
nx = nrow(df)
if (!isTRUE(nx > 0)) {
warning("No rows to update in `df`!")
return(TRUE)
}
df = fafbseg:::df2flytable(df, append = ifelse(append_allowed, NA,
FALSE))
newrows = is.na(df[["row_id"]])
if (any(newrows)) {
stop("Adding new rows not yet implemented")
# flytable_append_rows(df[newrows, , drop = FALSE], table = table,
# base = base, chunksize = chunksize, ...)
# df = df[!newrows, , drop = FALSE]
# nx = nrow(df)
}
if (!isTRUE(nx > 0))
return(TRUE)
if (nx > chunksize) {
nchunks = ceiling(nx/chunksize)
chunkids = rep(seq_len(nchunks), rep(chunksize, nchunks))[seq_len(nx)]
chunks = split(df, chunkids)
oks = pbapply::pbsapply(chunks, banctable_update_rows,
table = table, base = base, chunksize = Inf, append_allowed = FALSE,
...)
return(all(oks))
}
pyl = fafbseg:::df2updatepayload(df)
res = base$batch_update_rows(table_name = table, rows_data = pyl)
ok = isTRUE(all.equal(res, list(success = TRUE)))
return(ok)
}

# hidden
banctable_base <- function (base_name = "banc_meta",
table = NULL,
url = "https://cloud.seatable.io/",
workspace_id = "57832",
cached = TRUE,
ac = NULL) {
if(is.null(ac)) ac <- banctable_login()
if (!cached)
memoise::forget(banctable_base_impl)
base = try({
banctable_base_impl(table = table, base_name = base_name,
url = url, workspace_id = workspace_id)
}, silent = TRUE)
stale_token <- isTRUE(try(difftime(base$jwt_exp, Sys.time(),
units = "hours") < 1, silent = T))
retry = (cached && inherits(base, "try-error")) || stale_token
if (!retry)
return(base)
memoise::forget(banctable_base_impl)
banctable_base_impl(table = table, base_name = base_name,
url = url, workspace_id = workspace_id)
}

# hidden
banctable_base_impl <- function (base_name = "banc_meta",
table = NULL,
url = "https://cloud.seatable.io/",
workspace_id = "57832",
ac = NULL){
if(is.null(ac)) ac <- banctable_login()
if (is.null(base_name) && is.null(table))
stop("you must supply one of base or table name!")
if (is.null(base_name)) {
base = fafbseg:::flytable_base4table(table, ac = ac, cached = F)
return(invisible(base))
}
if (is.null(workspace_id)) {
wsdf = fafbseg:::flytable_workspaces(ac = ac)
wsdf.sel = subset(wsdf, wsdf$name == base_name)
if (nrow(wsdf.sel) == 0)
stop("Unable to find a workspace containing basename:",
base_name, "\nCheck basename and/or access permissions.")
if (nrow(wsdf.sel) > 1)
stop("Multiple workspaces containing basename:",
base_name, "\nYou must use banctable_base() specifying a workspace_id to resolve this ambiguity.")
workspace_id = wsdf.sel[["workspace_id"]]
}
base = reticulate::py_call(ac$get_base, workspace_id = workspace_id,
base_name = base_name)
base
}







39 changes: 38 additions & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@
#' @rdname banc.surf
"banc_al.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_neuropils.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_tracts.surf"

#' @docType data
#' @rdname banc.surf
"banc_vnc_nerves.surf"

## How it was obtained:
# res <- httr::GET("https://www.googleapis.com/storage/v1/b/zetta_lee_fly_cns_001_kisuk/o/final%2Fv2%2Fvolume_meshes%2Fmeshes%2F1%3A0.drc?alt=media&neuroglancer=610000b05b6497edcf20b78f29516970")
# httr::stop_for_status(res)
Expand Down Expand Up @@ -102,6 +114,14 @@
#' @rdname banc_to_jrc2018f_tpsreg
"jrc2018f_to_banc_tpsreg"

#' @docType data
#' @rdname banc_to_jrc2018f_tpsreg
"jrcvnc2018f_to_banc_tpsreg"

#' @docType data
#' @rdname banc_to_jrc2018f_tpsreg
"banc_to_jrcvnc2018f_tpsreg"

#' Thin-Plate Spline Registration for Mirroring in BANC Space
#'
#' @description
Expand Down Expand Up @@ -153,6 +173,23 @@
#' @docType data
"banc_mirror_tpsreg"


#' BANC neuropil name to number correspondence for neuroglancer
#'
#' @name banc.surf
#' @docType data
#' @description A BANC neuroglaner scene can be directed to a google cloud storage
#' location, where BANC-transformed standard neuropil meshes reside.
#' The source is
#' `precomputed://gs://lee-lab_brain-and-nerve-cord-fly-connectome/volume_meshes`
#' They can be
#' plotted in neuroglancer by adding this location, entering the `Seg.` pane
#' and entering the number that corresponds to the correct mesh.
#' This data frame gives the mesh name to nubmer correspondences.
#'
#' @seealso
#' \code{\link{banc.surf}} for the available neuropil objects for BANC.
#' These are `hxsruf` objects, names for subregions can be found as so:
#' `banc_neuropils.surf$RegionList`
"banc_volumes.df"


12 changes: 6 additions & 6 deletions R/l2.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@
#' # one time install of necessary python packages
#' fafbseg::simple_python(pkgs="fafbseg")
#'
#' dnp42=c("648518346507131167", "648518346485772414")
#' dnp42.latest=banc_latestid(dnp42)
#' dnp42.dps <- banc_read_l2dp(dnp42.latest)
#' dna02=c("720575941478275714", "720575941512946243")
#' dna02.latest=banc_latestid(dna02)
#' dna02.dps <- banc_read_l2dp(dna02.latest)
#'
#' # plot those
#' nclear3d()
#' plot3d(dnp42.dps, lwd=3)
#' plot3d(dna02.dps, lwd=3)
#' # nb dotprops are always in microns
#' wire3d(banc.surf/1e3, col='grey')
#'
#' nclear3d()
#' dnp42.skel <- banc_read_l2skel(dnp42.latest)
#' plot3d(dnp42.skel, lwd=2)
#' dna02.skel <- banc_read_l2skel(dna02.latest)
#' plot3d(dna02.skel, lwd=2)
#' # nb neuron skeletons are in nm
#' wire3d(banc.surf, col='grey')
#' }
Expand Down
Loading

0 comments on commit 4cdcc2f

Please sign in to comment.