From 8f788a1127b1863f07eb743839bfb0da2b44ae43 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 17 Jun 2024 13:29:49 +0100 Subject: [PATCH] Handle IDBFS persistent storage under PostMessage --- NEWS.md | 2 ++ R/Makefile | 2 +- packages/webr/DESCRIPTION | 2 +- packages/webr/R/mount.R | 10 +++++- packages/webr/man/mount.Rd | 8 ++++- packages/webr/src/init.c | 2 ++ packages/webr/src/mount.c | 31 ++++++++++++++++ src/docs/mounting.qmd | 73 +++++++++++++++++++++++++++++++++++--- src/webR/webr-chan.ts | 6 ++++ src/webR/webr-main.ts | 7 +++- src/webR/webr-worker.ts | 23 ++++++++++-- 11 files changed, 155 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 67113abb..77731474 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * The capturing mechanism of `captureR()` has been updated so that memory reallocation is performed when outputting very long lines. If reallocation is not possible (e.g. the environment does not have enough free memory to hold the entire line), the previous behaviour of truncating the line output is maintained (#434). +* Enabled the Emscripten IDBFS virtual filesystem driver. This filesystem type can be used to persist data in web browser storage across page reloads. This filesystem type must be used with the `PostMessage` communication channel (#56, #442). + ## Breaking changes * The `ServiceWorker` communication channel has been deprecated. Users should use the `SharedArrayBuffer` channel where cross-origin isolation is possible, or otherwise use the `PostMessage` channel. For the moment the `ServiceWorker` channel can still be used, but emits a warning at start up. The channel will be removed entirely in a future version of webR. diff --git a/R/Makefile b/R/Makefile index c5b077e1..4fec09b8 100644 --- a/R/Makefile +++ b/R/Makefile @@ -147,7 +147,7 @@ MAIN_LDFLAGS += -s EXIT_RUNTIME=1 MAIN_LDFLAGS += -s ERROR_ON_UNDEFINED_SYMBOLS=0 MAIN_LDFLAGS += -s EXPORTED_RUNTIME_METHODS=$(EXPORTED_RUNTIME_METHODS) MAIN_LDFLAGS += -s FETCH=1 -MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js +MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js -lidbfs.js MAIN_LDFLAGS += $(FORTRAN_WASM_LDADD) MAIN_LDFLAGS += $(WASM_OPT_LDADD) diff --git a/packages/webr/DESCRIPTION b/packages/webr/DESCRIPTION index eebb69d9..adf85234 100644 --- a/packages/webr/DESCRIPTION +++ b/packages/webr/DESCRIPTION @@ -17,4 +17,4 @@ Imports: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/packages/webr/R/mount.R b/packages/webr/R/mount.R index f1f1dd66..5a790ab6 100644 --- a/packages/webr/R/mount.R +++ b/packages/webr/R/mount.R @@ -16,12 +16,18 @@ #' `mountpoint`. This filesystem type can only be used when webR is running #' under Node. #' +#' When mounting an Emscripten "idbfs" type filesystem, files will be persisted +#' or populated from/to a browser-based IndexedDB database when the JavaScript +#' function `Module.FS.syncfs` is invoked. See the Emscripten `IDBFS` +#' documentation for more information. This filesystem type can only be used +#' when webR is running in a web browser. +#' #' @param mountpoint a character string giving the path to a directory to mount #' onto in the Emscripten virtual filesystem. #' @param source a character string giving the location of the data source to be #' mounted. #' @param type a character string giving the type of Emscripten filesystem to be -#' mounted: "workerfs" or "nodefs". +#' mounted: "workerfs", "nodefs", or "idbfs". #' #' @export mount <- function(mountpoint, source, type = "workerfs") { @@ -34,6 +40,8 @@ mount <- function(mountpoint, source, type = "workerfs") { invisible(.Call(ffi_mount_workerfs, base_url, mountpoint)) } else if (tolower(type) == "nodefs") { invisible(.Call(ffi_mount_nodefs, source, mountpoint)) + } else if (tolower(type) == "idbfs") { + invisible(.Call(ffi_mount_idbfs, mountpoint)) } else { stop(paste("Unsupported Emscripten Filesystem type:", type)) } diff --git a/packages/webr/man/mount.Rd b/packages/webr/man/mount.Rd index 21482235..7a6d77f6 100644 --- a/packages/webr/man/mount.Rd +++ b/packages/webr/man/mount.Rd @@ -17,7 +17,7 @@ onto in the Emscripten virtual filesystem.} mounted.} \item{type}{a character string giving the type of Emscripten filesystem to be -mounted: "workerfs" or "nodefs".} +mounted: "workerfs", "nodefs", or "idbfs".} } \description{ Uses the Emscripten filesystem API to mount a filesystem object onto a given @@ -34,4 +34,10 @@ the path to a physical directory on the host filesystem. The host directory will be mapped into the virtual filesystem and mounted onto the directory \code{mountpoint}. This filesystem type can only be used when webR is running under Node. + +When mounting an Emscripten "idbfs" type filesystem, files will be persisted +or populated from/to a browser-based IndexedDB database when the JavaScript +function \code{Module.FS.syncfs} is invoked. See the Emscripten \code{IDBFS} +documentation for more information. This filesystem type can only be used +when webR is running in a web browser. } diff --git a/packages/webr/src/init.c b/packages/webr/src/init.c index 78b22cd2..1839a826 100644 --- a/packages/webr/src/init.c +++ b/packages/webr/src/init.c @@ -10,6 +10,7 @@ extern SEXP ffi_dev_canvas_cache(void); extern SEXP ffi_dev_canvas_destroy(SEXP); extern SEXP ffi_mount_workerfs(SEXP, SEXP); extern SEXP ffi_mount_nodefs(SEXP, SEXP); +extern SEXP ffi_mount_idbfs(SEXP); extern SEXP ffi_unmount(SEXP); static @@ -23,6 +24,7 @@ const R_CallMethodDef CallEntries[] = { { "ffi_dev_canvas_destroy", (DL_FUNC) &ffi_dev_canvas_destroy, 1}, { "ffi_mount_workerfs", (DL_FUNC) &ffi_mount_workerfs, 2}, { "ffi_mount_nodefs", (DL_FUNC) &ffi_mount_nodefs, 2}, + { "ffi_mount_idbfs", (DL_FUNC) &ffi_mount_idbfs, 1}, { "ffi_unmount", (DL_FUNC) &ffi_unmount, 1}, { NULL, NULL, 0} }; diff --git a/packages/webr/src/mount.c b/packages/webr/src/mount.c index a3e69c3f..c2f2bfa1 100644 --- a/packages/webr/src/mount.c +++ b/packages/webr/src/mount.c @@ -74,6 +74,37 @@ SEXP ffi_mount_nodefs(SEXP source, SEXP mountpoint) { #endif } +SEXP ffi_mount_idbfs(SEXP mountpoint) { +#ifdef __EMSCRIPTEN__ + CHECK_STRING(mountpoint); + + EM_ASM({ + // Stop if we're not able to use a IDBFS filesystem object + if (typeof IN_NODE === 'boolean' && IN_NODE === true) { + const msg = Module.allocateUTF8OnStack( + 'The `IDBFS` filesystem object can only be used when running in a web browser.' + ); + Module._Rf_error(msg); + } + const mountpoint = UTF8ToString($0); + try { + Module.FS.mount(Module.FS.filesystems.IDBFS, {}, mountpoint); + } catch (e) { + let msg = e.message; + if (e.name === "ErrnoError" && e.errno === 10) { + const dir = Module.UTF8ToString($0); + msg = "Unable to mount directory, `" + dir + "` is already mounted."; + } + Module._Rf_error(Module.allocateUTF8OnStack(msg)); + } + }, R_CHAR(STRING_ELT(mountpoint, 0))); + + return R_NilValue; +#else + Rf_error("Function must be running under Emscripten."); +#endif +} + SEXP ffi_unmount(SEXP mountpoint) { #ifdef __EMSCRIPTEN__ CHECK_STRING(mountpoint); diff --git a/src/docs/mounting.qmd b/src/docs/mounting.qmd index e03c86a7..4eb3d99a 100644 --- a/src/docs/mounting.qmd +++ b/src/docs/mounting.qmd @@ -10,15 +10,21 @@ The [Emscripten filesystem API](https://emscripten.org/docs/api_reference/Filesy Mounting images and directories in this way gives the Wasm R process access to arbitrary external data, potentially including datasets, scripts, or R packages [pre-compiled for WebAssembly](building.qmd). -Emscripten's API provides several types of virtual filesystem, but for technical reasons^[Currently, webR blocks in the JavaScript worker thread while it waits for R input to be evaluated. This blocking means that Emscripten filesystems that depend on asynchronous browser APIs, such as [`IDBFS`](https://emscripten.org/docs/api_reference/Filesystem-API.html#filesystem-api-idbfs), do not work.] only the following filesystems are available for use with webR. +Emscripten's API allows for several types of virtual filesystem, depending on the execution environment. The following filesystems are available for use with webR: | Filesystem | Description | Web Browser | Node.js | |------|-----|------|------| -| `WORKERFS` | Mount filesystem images. | ✅ | ✅ | +| `WORKERFS` | Mount Emscripten filesystem images. | ✅ | ✅[^workerfs] | | `NODEFS` | Mount existing host directories. | ❌ | ✅ | +| `IDBFS` | Browser-based persistent storage using the [IndexedDB API](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API). | ✅[^idbfs] | ❌ | + +[^workerfs]: Be aware of the current GitHub issue [#328](https://github.com/r-wasm/webr/issues/328). +[^idbfs]: Using the `PostMessage` [communication channel](communication.qmd) only. ## Emscripten filesystem images +Emscripten filesystem images can be mounted using the `WORKERFS` filesystem type. + The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool, provided by Emscripten, takes in a directory structure as input and produces webR compatible filesystem images as output. The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool may be invoked from R using the [rwasm](https://r-wasm.github.io/rwasm/) R package: ```{r eval=FALSE} @@ -105,12 +111,12 @@ See the [Emscripten `FS.mount()` documentation](https://emscripten.org/docs/api_ ## Mount an existing host directory +The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process. + ::: callout-warning `NODEFS` is only available when running webR under Node.js. ::: -The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process. - To mount the directory `./extra` on the virtual filesystem at `/data`, use either the JavaScript or R mount API with the filesystem type set to `"NODEFS"`. ::: {.panel-tabset} @@ -130,6 +136,65 @@ webr::mount( ) ``` +::: + +## IndexedDB-based persistent storage + +When using webR in a web browser, an [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API)-based persistent storage space can be mounted using the `IDBFS` filesystem type. + +::: {.callout-warning} + +Due to how webR blocks for input in the worker thread, the `IDBFS` filesystem type **does not work** when using the `SharedArrayBuffer` communication channel. WebR must be configured to use the `PostMessage` communication channel to use `IDBFS` persistent storage. + +::: + +### Mounting + +First, create the directory to contain persistent file storage, and then mount using the `IDBFS` filesystem type. + +::: {.panel-tabset} +## JavaScript + +``` javascript +await webR.FS.mkdir('/data'); +await webR.FS.mount('IDBFS', {}, '/data'); +await webR.FS.syncfs(true); +``` + +## R +```{r eval=FALSE} +webr::mount(mountpoint = "/data", type = "IDBFS") +webr::eval_js("Module.FS.syncfs(true, () => {})") +``` + +::: + +### Populating and persisting files + +After mounting the filesystem using [`WebR.FS.mount()`](api/js/classes/WebR.WebR.md#fs), the Emscripten FS API function [`FS.syncfs()`](https://emscripten.org/docs/api_reference/Filesystem-API.html#FS.syncfs) has been invoked with the `populate` argument set to `true`. This extra step is **required** to initialise the virtual filesystem with any previously existing data files in the browser's IndexedDB storage. Without it, the filesystem will always be initially mounted as an empty directory. + +A similar `FS.syncfs()` step is required to persist the current state of the filesystem to the browser's IndexedDB storage. + +::: {.panel-tabset} +## JavaScript + +``` javascript +await webR.FS.syncfs(false); +``` + +## R +```{r eval=FALSE} +webr::eval_js("Module.FS.syncfs(false, () => {})") +``` ::: +Here the populate argument is set to `false`, meaning to save the current state of the filesystem to IndexedDB. When writing to the filesystem you should be sure to invoke `syncfs(false)` before the web page containing webR is closed to ensure that the output file data will be persisted in the backing IndexedDB storage. + +### Web storage caveats + +Filesystem data stored in an IndexedDB database can only be accessed within the current [origin](https://developer.mozilla.org/en-US/docs/Glossary/Origin), loosely the current web page's host domain and port. + +The way in which web browsers decide how much storage space to allocate for data and what to remove when limits are reached differs between browsers and is not always simple to calculate. Be aware of browser [storage quotas and eviction criteria](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria) and note that data stored in an `IDBFS` filesystem type is stored only on a "best-effort" basis. It can be removed by the browser at any time, autonomously or by the user interacting through the browser's UI. + +In private browsing mode, for example, stored data is usually deleted when the private session ends. diff --git a/src/webR/webr-chan.ts b/src/webR/webr-chan.ts index 020ce09f..ba22a399 100644 --- a/src/webR/webr-chan.ts +++ b/src/webR/webr-chan.ts @@ -156,6 +156,12 @@ export interface FSMountMessage extends Message { }; } +/** @internal */ +export interface FSSyncfsMessage extends Message { + type: 'syncfs'; + data: { populate: boolean }; +} + /** @internal */ export interface FSReadFileMessage extends Message { type: 'readFile'; diff --git a/src/webR/webr-main.ts b/src/webR/webr-main.ts index cba7f12e..76897cd5 100644 --- a/src/webR/webr-main.ts +++ b/src/webR/webr-main.ts @@ -25,6 +25,7 @@ import { EvalROptions, FSMessage, FSMountMessage, + FSSyncfsMessage, FSReadFileMessage, FSWriteFileMessage, InstallPackagesOptions, @@ -97,7 +98,7 @@ export type FSNode = { }; /** An Emscripten Filesystem type */ -export type FSType = 'NODEFS' | 'WORKERFS'; +export type FSType = 'NODEFS' | 'WORKERFS' | 'IDBFS'; /** * Configuration settings to be used when mounting Filesystem objects with @@ -474,6 +475,10 @@ export class WebR { const msg: FSMountMessage = { type: 'mount', data: { type, options, mountpoint } }; await this.#chan.request(msg); }, + syncfs: async (populate: boolean): Promise => { + const msg: FSSyncfsMessage = { type: 'syncfs', data: { populate } }; + await this.#chan.request(msg); + }, readFile: async (path: string, flags?: string): Promise => { const msg: FSReadFileMessage = { type: 'readFile', data: { path, flags } }; const payload = await this.#chan.request(msg); diff --git a/src/webR/webr-worker.ts b/src/webR/webr-worker.ts index 50cd36c0..bff76d1d 100644 --- a/src/webR/webr-worker.ts +++ b/src/webR/webr-worker.ts @@ -1,6 +1,6 @@ import { loadScript } from './compat'; import { ChannelWorker } from './chan/channel'; -import { newChannelWorker, ChannelInitMessage } from './chan/channel-common'; +import { newChannelWorker, ChannelInitMessage, ChannelType } from './chan/channel-common'; import { Message, Request, newResponse } from './chan/message'; import { FSNode, WebROptions } from './webr-main'; import { EmPtr, Module } from './emscripten'; @@ -32,6 +32,7 @@ import { ShelterMessage, ShelterDestroyMessage, InstallPackagesMessage, + FSSyncfsMessage, } from './webr-chan'; let initialised = false; @@ -47,6 +48,7 @@ const onWorkerMessage = function (msg: Message) { } const messageInit = msg as ChannelInitMessage; chan = newChannelWorker(messageInit); + messageInit.data.config.channelType = messageInit.data.channelType; init(messageInit.data.config); initialised = true; return; @@ -108,11 +110,28 @@ function dispatch(msg: Message): void { } case 'mount': { const msg = reqMsg as FSMountMessage; - const fs = Module.FS.filesystems[msg.data.type]; + const type = msg.data.type; + if (type === "IDBFS" && _config.channelType == ChannelType.SharedArrayBuffer) { + throw new Error( + 'The `IDBFS` filesystem type is not supported under the `SharedArrayBuffer` ' + + 'communication channel. The `PostMessage` communication channel must be used.' + ); + } + const fs = Module.FS.filesystems[type]; Module.FS.mount(fs, msg.data.options, msg.data.mountpoint); write({ obj: null, payloadType: 'raw' }); break; } + case 'syncfs': { + const msg = reqMsg as FSSyncfsMessage; + Module.FS.syncfs(msg.data.populate, (err: string | undefined) => { + if (err) { + throw new Error(`Emscripten \`syncfs\` error: "${err}".`); + } + }); + write({ obj: null, payloadType: 'raw' }); + break; + } case 'readFile': { const msg = reqMsg as FSReadFileMessage; const reqData = msg.data;