-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from flyconnectome/asb-dev
Updates to BANC registrations and seatable access
- Loading branch information
Showing
46 changed files
with
1,173 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
#' @title Read and write to the seatable for draft BANC annotations | ||
#' | ||
#' @description These functions use the logic and wrap some code | ||
#' from the `flytable_.*` functions in the `fafbseg` R package. | ||
#' \code{banctable_set_token} will obtain and store a permanent | ||
#' seatable user-level API token. | ||
#' \code{banctable_query} performs a SQL query against a banctable | ||
#' database. You can omit the \code{base} argument unless you have tables of | ||
#' the same name in different bases. | ||
#' \code{banctable_base} returns a \code{base} object (equivalent to | ||
#' a mysql database) which allows you to access one or more tables, logging in | ||
#' to the service if necessary. The returned base object give you full access | ||
#' to the Python | ||
#' \href{https://seatable.github.io/seatable-scripts/python/base/}{\code{Base}} | ||
#' API allowing a range of row/column manipulations. | ||
#' \code{banctable_update_rows} updates existing rows in a table, returning TRUE on success. | ||
#' | ||
#' @param sql A SQL query string. See examples and | ||
#' \href{https://seatable.github.io/seatable-scripts/python/query/}{seatable | ||
#' docs}. | ||
#' @param limit An optional limit, which only applies if you do not specify a | ||
#' limit directly in the \code{sql} query. By default seatable limits SQL | ||
#' queries to 100 rows. We increase the limit to 100000 rows by default. | ||
#' @param convert Expert use only: Whether or not to allow the Python seatable | ||
#' module to process raw output from the database. This is is principally for | ||
#' debugging purposes. NB this imposes a requirement of seatable_api >=2.4.0. | ||
#' @param python Logical. Whether to return a Python pandas DataFrame. The default of FALSE returns an R data.frame | ||
#' @param base Character vector specifying the \code{base} | ||
#' @param table Character vector specifying a table foe which you want a | ||
#' \code{base} object. | ||
# @param workspace_id A numeric id specifying the workspace. Advanced use only | ||
# since we can normally figure this out from \code{base_name}. | ||
# @param cached Whether to use a cached base object | ||
#' @param token normally retrieved from \code{BANCTABLE_TOKEN} environment | ||
#' variable. | ||
#' @param user,pwd banctable user and password used by \code{banctable_set_token} | ||
#' to obtain a token | ||
#' @param url Optional URL to the server | ||
#' @param ac A seatable connection object as returned by \code{banctable_login}. | ||
#' @param df A data.frame containing the data to upload including an `_id` | ||
#' column that can identify each row in the remote table. | ||
#' @param append_allowed Logical. Whether rows without row identifiers can be appended. | ||
#' @param chunksize To split large requests into smaller ones with max this many rows. | ||
#' @param ... Additional arguments passed to pbsapply which might include cl=2 to specify a number of parallel jobs to run. | ||
#' | ||
#' @return a \code{data.frame} of results. There should be 0 rows if no rows | ||
#' matched query. | ||
#' | ||
#' @seealso \code{fafbseg::\link{flytable_query}} | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Do this once | ||
#' banctable_set_token(user="MY_EMAIL_FOR_SEATABLE.com", | ||
#' pwd="MY_SEATABLE_PASSWORD", | ||
#' url="https://cloud.seatable.io/") | ||
#' | ||
#' # Thereafter: | ||
#' banc.meta <- banctable_query() | ||
#' } | ||
#' @export | ||
#' @rdname banctable_query | ||
banctable_query <- function (sql = "SELECT * FROM banc_meta", | ||
limit = 100000L, | ||
base = NULL, | ||
python = FALSE, | ||
convert = TRUE, | ||
ac = NULL){ | ||
if(is.null(ac)) ac <- banctable_login() | ||
checkmate::assert_character(sql, len = 1, pattern = "select", | ||
ignore.case = T) | ||
res = stringr::str_match(sql, stringr::regex("\\s+FROM\\s+[']{0,1}([^, ']+).*", | ||
ignore_case = T)) | ||
if (any(is.na(res)[, 2])) | ||
stop("Cannot identify a table name in your sql statement!\n") | ||
table = res[, 2] | ||
if (is.null(base)) { | ||
base = try(banctable_base(table = table)) | ||
if (inherits(base, "try-error")) | ||
stop("I inferred table_name: ", table, " from your SQL query but couldn't connect to a base with this table!") | ||
} | ||
else if (is.character(base)) | ||
base = banctable_base(base_name = base) | ||
if (!isTRUE(grepl("\\s+limit\\s+\\d+", sql)) && !isFALSE(limit)) { | ||
if (!is.finite(limit)) | ||
limit = .Machine$integer.max | ||
sql = paste(sql, "LIMIT", limit) | ||
} | ||
pyout <- reticulate::py_capture_output(ll <- try(reticulate::py_call(base$query, | ||
sql, convert = convert), silent = T)) | ||
if (inherits(ll, "try-error")) { | ||
warning(paste("No rows returned by banctable", pyout, | ||
collapse = "\n")) | ||
return(NULL) | ||
} | ||
pd = reticulate::import("pandas") | ||
reticulate::py_capture_output(pdd <- reticulate::py_call(pd$DataFrame, | ||
ll)) | ||
if (python) | ||
pdd | ||
else { | ||
colinfo = fafbseg::flytable_columns(table, base) | ||
df = fafbseg:::flytable2df(fafbseg:::pandas2df(pdd, use_arrow = F), tidf = colinfo) | ||
fields = fafbseg:::sql2fields(sql) | ||
if (length(fields) == 1 && fields == "*") { | ||
toorder = intersect(colinfo$name, colnames(df)) | ||
} | ||
else { | ||
toorder = intersect(fafbseg:::sql2fields(sql), colnames(df)) | ||
} | ||
rest = setdiff(colnames(df), toorder) | ||
df[c(toorder, rest)] | ||
} | ||
} | ||
|
||
#' @export | ||
#' @rdname banctable_query | ||
banctable_set_token <- function(user, pwd, url = "https://cloud.seatable.io/"){ | ||
st <- fafbseg:::check_seatable() | ||
ac <- reticulate::py_call(st$Account, login_name = user, | ||
password = pwd, server_url = url) | ||
ac$auth() | ||
Sys.setenv(banctable_TOKEN = ac$token) | ||
cat("banctable_TOKEN='", ac$token, "'\n", sep = "", append = TRUE, | ||
file = path.expand("~/.Renviron")) | ||
return(invisible(NULL)) | ||
} | ||
|
||
#' @export | ||
#' @rdname banctable_query | ||
banctable_login <- function(url = "https://cloud.seatable.io/", | ||
token = Sys.getenv("BANCTABLE_TOKEN", unset = NA_character_)){ | ||
fafbseg::flytable_login(url=url, token=token) | ||
} | ||
|
||
|
||
#' @export | ||
#' @rdname banctable_query | ||
banctable_update_rows <- function (df, table, base = NULL, append_allowed = TRUE, chunksize = 1000L, ...) { | ||
if (is.character(base) || is.null(base)) | ||
base = banctable_base(base_name = base, table = table) | ||
nx = nrow(df) | ||
if (!isTRUE(nx > 0)) { | ||
warning("No rows to update in `df`!") | ||
return(TRUE) | ||
} | ||
df = fafbseg:::df2flytable(df, append = ifelse(append_allowed, NA, | ||
FALSE)) | ||
newrows = is.na(df[["row_id"]]) | ||
if (any(newrows)) { | ||
stop("Adding new rows not yet implemented") | ||
# flytable_append_rows(df[newrows, , drop = FALSE], table = table, | ||
# base = base, chunksize = chunksize, ...) | ||
# df = df[!newrows, , drop = FALSE] | ||
# nx = nrow(df) | ||
} | ||
if (!isTRUE(nx > 0)) | ||
return(TRUE) | ||
if (nx > chunksize) { | ||
nchunks = ceiling(nx/chunksize) | ||
chunkids = rep(seq_len(nchunks), rep(chunksize, nchunks))[seq_len(nx)] | ||
chunks = split(df, chunkids) | ||
oks = pbapply::pbsapply(chunks, banctable_update_rows, | ||
table = table, base = base, chunksize = Inf, append_allowed = FALSE, | ||
...) | ||
return(all(oks)) | ||
} | ||
pyl = fafbseg:::df2updatepayload(df) | ||
res = base$batch_update_rows(table_name = table, rows_data = pyl) | ||
ok = isTRUE(all.equal(res, list(success = TRUE))) | ||
return(ok) | ||
} | ||
|
||
# hidden | ||
banctable_base <- function (base_name = "banc_meta", | ||
table = NULL, | ||
url = "https://cloud.seatable.io/", | ||
workspace_id = "57832", | ||
cached = TRUE, | ||
ac = NULL) { | ||
if(is.null(ac)) ac <- banctable_login() | ||
if (!cached) | ||
memoise::forget(banctable_base_impl) | ||
base = try({ | ||
banctable_base_impl(table = table, base_name = base_name, | ||
url = url, workspace_id = workspace_id) | ||
}, silent = TRUE) | ||
stale_token <- isTRUE(try(difftime(base$jwt_exp, Sys.time(), | ||
units = "hours") < 1, silent = T)) | ||
retry = (cached && inherits(base, "try-error")) || stale_token | ||
if (!retry) | ||
return(base) | ||
memoise::forget(banctable_base_impl) | ||
banctable_base_impl(table = table, base_name = base_name, | ||
url = url, workspace_id = workspace_id) | ||
} | ||
|
||
# hidden | ||
banctable_base_impl <- function (base_name = "banc_meta", | ||
table = NULL, | ||
url = "https://cloud.seatable.io/", | ||
workspace_id = "57832", | ||
ac = NULL){ | ||
if(is.null(ac)) ac <- banctable_login() | ||
if (is.null(base_name) && is.null(table)) | ||
stop("you must supply one of base or table name!") | ||
if (is.null(base_name)) { | ||
base = fafbseg:::flytable_base4table(table, ac = ac, cached = F) | ||
return(invisible(base)) | ||
} | ||
if (is.null(workspace_id)) { | ||
wsdf = fafbseg:::flytable_workspaces(ac = ac) | ||
wsdf.sel = subset(wsdf, wsdf$name == base_name) | ||
if (nrow(wsdf.sel) == 0) | ||
stop("Unable to find a workspace containing basename:", | ||
base_name, "\nCheck basename and/or access permissions.") | ||
if (nrow(wsdf.sel) > 1) | ||
stop("Multiple workspaces containing basename:", | ||
base_name, "\nYou must use banctable_base() specifying a workspace_id to resolve this ambiguity.") | ||
workspace_id = wsdf.sel[["workspace_id"]] | ||
} | ||
base = reticulate::py_call(ac$get_base, workspace_id = workspace_id, | ||
base_name = base_name) | ||
base | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.