From a543b3e401aa31cf540325a7c14ff8ade6fe3452 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 19 Jun 2024 11:19:45 +0100 Subject: [PATCH] Add `IDBFS` persistent storage under `PostMessage` communication channel (#445) * Handle IDBFS persistent storage under PostMessage * Resolve syncfs() only once async process completes * Add webr::syncfs() * Update IDBFS & syncfs documentation --- NEWS.md | 2 + R/Makefile | 2 +- packages/webr/DESCRIPTION | 2 +- packages/webr/NAMESPACE | 1 + packages/webr/R/library.R | 2 + packages/webr/R/mount.R | 27 +++++++++- packages/webr/man/library_shim.Rd | 4 ++ packages/webr/man/mount.Rd | 9 +++- packages/webr/man/syncfs.Rd | 19 +++++++ packages/webr/src/init.c | 4 ++ packages/webr/src/mount.c | 49 ++++++++++++++++++ src/docs/mounting.qmd | 83 +++++++++++++++++++++++++++++-- src/webR/webr-chan.ts | 6 +++ src/webR/webr-main.ts | 7 ++- src/webR/webr-worker.ts | 23 ++++++++- 15 files changed, 229 insertions(+), 11 deletions(-) create mode 100644 packages/webr/man/syncfs.Rd diff --git a/NEWS.md b/NEWS.md index 67113abb..77731474 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * The capturing mechanism of `captureR()` has been updated so that memory reallocation is performed when outputting very long lines. If reallocation is not possible (e.g. the environment does not have enough free memory to hold the entire line), the previous behaviour of truncating the line output is maintained (#434). +* Enabled the Emscripten IDBFS virtual filesystem driver. This filesystem type can be used to persist data in web browser storage across page reloads. This filesystem type must be used with the `PostMessage` communication channel (#56, #442). + ## Breaking changes * The `ServiceWorker` communication channel has been deprecated. Users should use the `SharedArrayBuffer` channel where cross-origin isolation is possible, or otherwise use the `PostMessage` channel. For the moment the `ServiceWorker` channel can still be used, but emits a warning at start up. The channel will be removed entirely in a future version of webR. diff --git a/R/Makefile b/R/Makefile index c5b077e1..4fec09b8 100644 --- a/R/Makefile +++ b/R/Makefile @@ -147,7 +147,7 @@ MAIN_LDFLAGS += -s EXIT_RUNTIME=1 MAIN_LDFLAGS += -s ERROR_ON_UNDEFINED_SYMBOLS=0 MAIN_LDFLAGS += -s EXPORTED_RUNTIME_METHODS=$(EXPORTED_RUNTIME_METHODS) MAIN_LDFLAGS += -s FETCH=1 -MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js +MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js -lidbfs.js MAIN_LDFLAGS += $(FORTRAN_WASM_LDADD) MAIN_LDFLAGS += $(WASM_OPT_LDADD) diff --git a/packages/webr/DESCRIPTION b/packages/webr/DESCRIPTION index eebb69d9..adf85234 100644 --- a/packages/webr/DESCRIPTION +++ b/packages/webr/DESCRIPTION @@ -17,4 +17,4 @@ Imports: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/packages/webr/NAMESPACE b/packages/webr/NAMESPACE index f9ca3f0f..25c1ee09 100644 --- a/packages/webr/NAMESPACE +++ b/packages/webr/NAMESPACE @@ -14,6 +14,7 @@ export(mount) export(pager_install) export(require_shim) export(shim_install) +export(syncfs) export(test_package) export(unmount) useDynLib(webr, .registration = TRUE) diff --git a/packages/webr/R/library.R b/packages/webr/R/library.R index ba1861ef..3ebb7740 100644 --- a/packages/webr/R/library.R +++ b/packages/webr/R/library.R @@ -21,8 +21,10 @@ #' `show_menu` argument. By default, if no global option is set and no argument #' is provided, the menu will not be shown. #' +#' @param pkg Character vector of package names #' @param show_menu Show a menu asking the user if they would like to install #' the package if it is missing. Defaults to `getOption("webr.show_menu")`. +#' @param ... Other arguments to be passed to `library` and `require`. #' #' @export library_shim <- function(pkg, ..., show_menu = getOption("webr.show_menu")) { diff --git a/packages/webr/R/mount.R b/packages/webr/R/mount.R index f1f1dd66..fc882cf1 100644 --- a/packages/webr/R/mount.R +++ b/packages/webr/R/mount.R @@ -16,12 +16,19 @@ #' `mountpoint`. This filesystem type can only be used when webR is running #' under Node. #' +#' When mounting an Emscripten "idbfs" type filesystem, files will be persisted +#' to or populated from a browser-based IndexedDB database whenever the +#' JavaScript function `Module.FS.syncfs` is invoked. See the Emscripten `IDBFS` +#' documentation for more information. This filesystem type can only be used +#' when webR is running in a web browser and using the `PostMessage` +#' communication channel. +#' #' @param mountpoint a character string giving the path to a directory to mount #' onto in the Emscripten virtual filesystem. #' @param source a character string giving the location of the data source to be #' mounted. #' @param type a character string giving the type of Emscripten filesystem to be -#' mounted: "workerfs" or "nodefs". +#' mounted: "workerfs", "nodefs", or "idbfs". #' #' @export mount <- function(mountpoint, source, type = "workerfs") { @@ -34,6 +41,8 @@ mount <- function(mountpoint, source, type = "workerfs") { invisible(.Call(ffi_mount_workerfs, base_url, mountpoint)) } else if (tolower(type) == "nodefs") { invisible(.Call(ffi_mount_nodefs, source, mountpoint)) + } else if (tolower(type) == "idbfs") { + invisible(.Call(ffi_mount_idbfs, mountpoint)) } else { stop(paste("Unsupported Emscripten Filesystem type:", type)) } @@ -44,3 +53,19 @@ mount <- function(mountpoint, source, type = "workerfs") { unmount <- function(mountpoint) { invisible(.Call(ffi_unmount, mountpoint)) } + +#' Synchronise the Emscripten virtual filesystem +#' +#' @description +#' Uses the Emscripten filesystem API to synchronise all mounted virtual +#' filesystems with their backing storage, where it exists. The `populate` +#' argument controls the direction of the synchronisation between Emscripten's +#' internal data and the file system's persistent store. +#' +#' @param populate A boolean. When `true`, initialises the filesystem with data +#' from persistent storage. When `false`, writes current filesystem data to +#' the persistent storage. +#' @export +syncfs <- function(populate) { + invisible(.Call(ffi_syncfs, populate)) +} diff --git a/packages/webr/man/library_shim.Rd b/packages/webr/man/library_shim.Rd index a1fe01a6..f5a33163 100644 --- a/packages/webr/man/library_shim.Rd +++ b/packages/webr/man/library_shim.Rd @@ -10,6 +10,10 @@ library_shim(pkg, ..., show_menu = getOption("webr.show_menu")) require_shim(pkg, ..., show_menu = getOption("webr.show_menu")) } \arguments{ +\item{pkg}{Character vector of package names} + +\item{...}{Other arguments to be passed to \code{library} and \code{require}.} + \item{show_menu}{Show a menu asking the user if they would like to install the package if it is missing. Defaults to \code{getOption("webr.show_menu")}.} } diff --git a/packages/webr/man/mount.Rd b/packages/webr/man/mount.Rd index 21482235..956d1de6 100644 --- a/packages/webr/man/mount.Rd +++ b/packages/webr/man/mount.Rd @@ -17,7 +17,7 @@ onto in the Emscripten virtual filesystem.} mounted.} \item{type}{a character string giving the type of Emscripten filesystem to be -mounted: "workerfs" or "nodefs".} +mounted: "workerfs", "nodefs", or "idbfs".} } \description{ Uses the Emscripten filesystem API to mount a filesystem object onto a given @@ -34,4 +34,11 @@ the path to a physical directory on the host filesystem. The host directory will be mapped into the virtual filesystem and mounted onto the directory \code{mountpoint}. This filesystem type can only be used when webR is running under Node. + +When mounting an Emscripten "idbfs" type filesystem, files will be persisted +to or populated from a browser-based IndexedDB database whenever the +JavaScript function \code{Module.FS.syncfs} is invoked. See the Emscripten \code{IDBFS} +documentation for more information. This filesystem type can only be used +when webR is running in a web browser and using the \code{PostMessage} +communication channel. } diff --git a/packages/webr/man/syncfs.Rd b/packages/webr/man/syncfs.Rd new file mode 100644 index 00000000..4d95fc13 --- /dev/null +++ b/packages/webr/man/syncfs.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mount.R +\name{syncfs} +\alias{syncfs} +\title{Synchronise the Emscripten virtual filesystem} +\usage{ +syncfs(populate) +} +\arguments{ +\item{populate}{A boolean. When \code{true}, initialises the filesystem with data +from persistent storage. When \code{false}, writes current filesystem data to +the persistent storage.} +} +\description{ +Uses the Emscripten filesystem API to synchronise all mounted virtual +filesystems with their backing storage, where it exists. The \code{populate} +argument controls the direction of the synchronisation between Emscripten's +internal data and the file system's persistent store. +} diff --git a/packages/webr/src/init.c b/packages/webr/src/init.c index 78b22cd2..e1dcf96a 100644 --- a/packages/webr/src/init.c +++ b/packages/webr/src/init.c @@ -10,6 +10,8 @@ extern SEXP ffi_dev_canvas_cache(void); extern SEXP ffi_dev_canvas_destroy(SEXP); extern SEXP ffi_mount_workerfs(SEXP, SEXP); extern SEXP ffi_mount_nodefs(SEXP, SEXP); +extern SEXP ffi_mount_idbfs(SEXP); +extern SEXP ffi_syncfs(SEXP); extern SEXP ffi_unmount(SEXP); static @@ -23,6 +25,8 @@ const R_CallMethodDef CallEntries[] = { { "ffi_dev_canvas_destroy", (DL_FUNC) &ffi_dev_canvas_destroy, 1}, { "ffi_mount_workerfs", (DL_FUNC) &ffi_mount_workerfs, 2}, { "ffi_mount_nodefs", (DL_FUNC) &ffi_mount_nodefs, 2}, + { "ffi_mount_idbfs", (DL_FUNC) &ffi_mount_idbfs, 1}, + { "ffi_syncfs", (DL_FUNC) &ffi_syncfs, 1}, { "ffi_unmount", (DL_FUNC) &ffi_unmount, 1}, { NULL, NULL, 0} }; diff --git a/packages/webr/src/mount.c b/packages/webr/src/mount.c index a3e69c3f..9289a646 100644 --- a/packages/webr/src/mount.c +++ b/packages/webr/src/mount.c @@ -15,6 +15,14 @@ Rf_error("`" #arg "` can't be `NA`."); \ } +#define CHECK_LOGICAL(arg) \ + if (!Rf_isLogical(arg) || LENGTH(arg) != 1) { \ + Rf_error("`" #arg "` must be a logical."); \ + } \ + if (LOGICAL(arg)[0] == NA_LOGICAL){ \ + Rf_error("`" #arg "` can't be `NA`."); \ + } + SEXP ffi_mount_workerfs(SEXP source, SEXP mountpoint) { #ifdef __EMSCRIPTEN__ CHECK_STRING(source); @@ -74,6 +82,47 @@ SEXP ffi_mount_nodefs(SEXP source, SEXP mountpoint) { #endif } +SEXP ffi_mount_idbfs(SEXP mountpoint) { +#ifdef __EMSCRIPTEN__ + CHECK_STRING(mountpoint); + + EM_ASM({ + // Stop if we're not able to use a IDBFS filesystem object + if (typeof IN_NODE === 'boolean' && IN_NODE === true) { + const msg = Module.allocateUTF8OnStack( + 'The `IDBFS` filesystem object can only be used when running in a web browser.' + ); + Module._Rf_error(msg); + } + const mountpoint = UTF8ToString($0); + try { + Module.FS.mount(Module.FS.filesystems.IDBFS, {}, mountpoint); + } catch (e) { + let msg = e.message; + if (e.name === "ErrnoError" && e.errno === 10) { + const dir = Module.UTF8ToString($0); + msg = "Unable to mount directory, `" + dir + "` is already mounted."; + } + Module._Rf_error(Module.allocateUTF8OnStack(msg)); + } + }, R_CHAR(STRING_ELT(mountpoint, 0))); + + return R_NilValue; +#else + Rf_error("Function must be running under Emscripten."); +#endif +} + +SEXP ffi_syncfs(SEXP populate) { +#ifdef __EMSCRIPTEN__ + CHECK_LOGICAL(populate); + EM_ASM({ Module.FS.syncfs($0, () => {}) }, LOGICAL(populate)[0]); + return R_NilValue; +#else + Rf_error("Function must be running under Emscripten."); +#endif +} + SEXP ffi_unmount(SEXP mountpoint) { #ifdef __EMSCRIPTEN__ CHECK_STRING(mountpoint); diff --git a/src/docs/mounting.qmd b/src/docs/mounting.qmd index e03c86a7..6c74ff2c 100644 --- a/src/docs/mounting.qmd +++ b/src/docs/mounting.qmd @@ -10,15 +10,21 @@ The [Emscripten filesystem API](https://emscripten.org/docs/api_reference/Filesy Mounting images and directories in this way gives the Wasm R process access to arbitrary external data, potentially including datasets, scripts, or R packages [pre-compiled for WebAssembly](building.qmd). -Emscripten's API provides several types of virtual filesystem, but for technical reasons^[Currently, webR blocks in the JavaScript worker thread while it waits for R input to be evaluated. This blocking means that Emscripten filesystems that depend on asynchronous browser APIs, such as [`IDBFS`](https://emscripten.org/docs/api_reference/Filesystem-API.html#filesystem-api-idbfs), do not work.] only the following filesystems are available for use with webR. +Emscripten's API allows for several types of virtual filesystem, depending on the execution environment. The following filesystems are available for use with webR: | Filesystem | Description | Web Browser | Node.js | |------|-----|------|------| -| `WORKERFS` | Mount filesystem images. | ✅ | ✅ | +| `WORKERFS` | Mount Emscripten filesystem images. | ✅ | ✅[^workerfs] | | `NODEFS` | Mount existing host directories. | ❌ | ✅ | +| `IDBFS` | Browser-based persistent storage using the [IndexedDB API](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API). | ✅[^idbfs] | ❌ | + +[^workerfs]: Be aware of the current GitHub issue [#328](https://github.com/r-wasm/webr/issues/328). +[^idbfs]: Using the `PostMessage` [communication channel](communication.qmd) only. ## Emscripten filesystem images +Emscripten filesystem images can be mounted using the `WORKERFS` filesystem type. + The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool, provided by Emscripten, takes in a directory structure as input and produces webR compatible filesystem images as output. The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool may be invoked from R using the [rwasm](https://r-wasm.github.io/rwasm/) R package: ```{r eval=FALSE} @@ -105,12 +111,12 @@ See the [Emscripten `FS.mount()` documentation](https://emscripten.org/docs/api_ ## Mount an existing host directory +The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process. + ::: callout-warning `NODEFS` is only available when running webR under Node.js. ::: -The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process. - To mount the directory `./extra` on the virtual filesystem at `/data`, use either the JavaScript or R mount API with the filesystem type set to `"NODEFS"`. ::: {.panel-tabset} @@ -130,6 +136,75 @@ webr::mount( ) ``` +::: + +## IndexedDB Filesystem Storage + +When using webR in a web browser, an [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API)-based persistent storage space can be mounted using the `IDBFS` filesystem type. + +::: {.callout-warning} + +Due to the way webR blocks for input in the worker thread, the `IDBFS` filesystem type **does not work** when using the `SharedArrayBuffer` communication channel. WebR must be configured to use the `PostMessage` communication channel to use `IDBFS` persistent storage. + +::: + +### Mounting + +First, create a directory to contain the IndexedDB filesystem, then use either the JavaScript or R mount API with type `"IDBFS"`. + +::: {.panel-tabset} +## JavaScript + +``` javascript +await webR.FS.mkdir('/data'); +await webR.FS.mount('IDBFS', {}, '/data'); +await webR.FS.syncfs(true); +``` + +## R +```{r eval=FALSE} +dir.create("/data") +webr::mount(mountpoint = "/data", type = "IDBFS") +webr::syncfs(TRUE) +``` ::: +After mounting the filesystem using [`mount()`](api/r.html#mount), the [`syncfs()`](api/r.html#syncfs) function should been invoked with its `populate` argument set to `true`. This extra step is **required** to initialise the virtual filesystem with any previously existing data files in the browser's IndexedDB storage. Without it, the filesystem will always be initially mounted as an empty directory. + +For more information, see the Emscripten FS API [`IDBFS` and `FS.syncfs()`](https://emscripten.org/docs/api_reference/Filesystem-API.html#filesystem-api-idbfs) documentation. + +### Persisting the filesystem to IndexedDB + +The `syncfs()` function should be invoked with its `populate` argument set to `false` to persist the current state of the filesystem to the browser's IndexedDB storage. + +::: {.panel-tabset} +## JavaScript + +``` javascript +await webR.FS.syncfs(false); +``` + +## R +```{r eval=FALSE} +webr::syncfs(FALSE) +``` + +::: + +After writing to the virtual filesystem you should be sure to invoke `syncfs(false)` before the web page containing webR is closed to ensure that the filesystem data is flushed and written to the IndexedDB-based persistent storage. + +::: {.callout-warning} + +Operations performed using IndexedDB are done asynchronously. If you are mounting `IDBFS` filesystems and accessing data non-interactively you should use the JavaScript API and be sure to wait for the `Promise` returned by `webR.FS.syncfs(false)` to resolve before continuing, for example by using the `await` keyword. + +In a future version of webR the `webr::syncfs()` function will similarly return a Promise-like object. +::: + +### Web storage caveats + +Filesystem data stored in an IndexedDB database can only be accessed within the current [origin](https://developer.mozilla.org/en-US/docs/Glossary/Origin), loosely defined as the current web page's host domain and port. + +The way in which web browsers decide how much storage space to allocate for data and what to remove when limits are reached differs between browsers and is not always simple to calculate. Be aware of browser [storage quotas and eviction criteria](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria) and note that data stored in an `IDBFS` filesystem type is stored only on a "best-effort" basis. It can be removed by the browser at any time, autonomously or by the user interacting through the browser's UI. + +In private browsing mode, for example, stored data is usually deleted when the private session ends. diff --git a/src/webR/webr-chan.ts b/src/webR/webr-chan.ts index 020ce09f..ba22a399 100644 --- a/src/webR/webr-chan.ts +++ b/src/webR/webr-chan.ts @@ -156,6 +156,12 @@ export interface FSMountMessage extends Message { }; } +/** @internal */ +export interface FSSyncfsMessage extends Message { + type: 'syncfs'; + data: { populate: boolean }; +} + /** @internal */ export interface FSReadFileMessage extends Message { type: 'readFile'; diff --git a/src/webR/webr-main.ts b/src/webR/webr-main.ts index cba7f12e..76897cd5 100644 --- a/src/webR/webr-main.ts +++ b/src/webR/webr-main.ts @@ -25,6 +25,7 @@ import { EvalROptions, FSMessage, FSMountMessage, + FSSyncfsMessage, FSReadFileMessage, FSWriteFileMessage, InstallPackagesOptions, @@ -97,7 +98,7 @@ export type FSNode = { }; /** An Emscripten Filesystem type */ -export type FSType = 'NODEFS' | 'WORKERFS'; +export type FSType = 'NODEFS' | 'WORKERFS' | 'IDBFS'; /** * Configuration settings to be used when mounting Filesystem objects with @@ -474,6 +475,10 @@ export class WebR { const msg: FSMountMessage = { type: 'mount', data: { type, options, mountpoint } }; await this.#chan.request(msg); }, + syncfs: async (populate: boolean): Promise => { + const msg: FSSyncfsMessage = { type: 'syncfs', data: { populate } }; + await this.#chan.request(msg); + }, readFile: async (path: string, flags?: string): Promise => { const msg: FSReadFileMessage = { type: 'readFile', data: { path, flags } }; const payload = await this.#chan.request(msg); diff --git a/src/webR/webr-worker.ts b/src/webR/webr-worker.ts index 50cd36c0..c1ea4264 100644 --- a/src/webR/webr-worker.ts +++ b/src/webR/webr-worker.ts @@ -1,6 +1,6 @@ import { loadScript } from './compat'; import { ChannelWorker } from './chan/channel'; -import { newChannelWorker, ChannelInitMessage } from './chan/channel-common'; +import { newChannelWorker, ChannelInitMessage, ChannelType } from './chan/channel-common'; import { Message, Request, newResponse } from './chan/message'; import { FSNode, WebROptions } from './webr-main'; import { EmPtr, Module } from './emscripten'; @@ -32,6 +32,7 @@ import { ShelterMessage, ShelterDestroyMessage, InstallPackagesMessage, + FSSyncfsMessage, } from './webr-chan'; let initialised = false; @@ -47,6 +48,7 @@ const onWorkerMessage = function (msg: Message) { } const messageInit = msg as ChannelInitMessage; chan = newChannelWorker(messageInit); + messageInit.data.config.channelType = messageInit.data.channelType; init(messageInit.data.config); initialised = true; return; @@ -108,11 +110,28 @@ function dispatch(msg: Message): void { } case 'mount': { const msg = reqMsg as FSMountMessage; - const fs = Module.FS.filesystems[msg.data.type]; + const type = msg.data.type; + if (type === "IDBFS" && _config.channelType == ChannelType.SharedArrayBuffer) { + throw new Error( + 'The `IDBFS` filesystem type is not supported under the `SharedArrayBuffer` ' + + 'communication channel. The `PostMessage` communication channel must be used.' + ); + } + const fs = Module.FS.filesystems[type]; Module.FS.mount(fs, msg.data.options, msg.data.mountpoint); write({ obj: null, payloadType: 'raw' }); break; } + case 'syncfs': { + const msg = reqMsg as FSSyncfsMessage; + Module.FS.syncfs(msg.data.populate, (err: string | undefined) => { + if (err) { + throw new Error(`Emscripten \`syncfs\` error: "${err}".`); + } + write({ obj: null, payloadType: 'raw' }); + }); + break; + } case 'readFile': { const msg = reqMsg as FSReadFileMessage; const reqData = msg.data;