Skip to content

Commit

Permalink
Handle IDBFS persistent storage under PostMessage
Browse files Browse the repository at this point in the history
  • Loading branch information
georgestagg committed Jun 19, 2024
1 parent 0c5a90f commit 8f788a1
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 11 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

* The capturing mechanism of `captureR()` has been updated so that memory reallocation is performed when outputting very long lines. If reallocation is not possible (e.g. the environment does not have enough free memory to hold the entire line), the previous behaviour of truncating the line output is maintained (#434).

* Enabled the Emscripten IDBFS virtual filesystem driver. This filesystem type can be used to persist data in web browser storage across page reloads. This filesystem type must be used with the `PostMessage` communication channel (#56, #442).

## Breaking changes

* The `ServiceWorker` communication channel has been deprecated. Users should use the `SharedArrayBuffer` channel where cross-origin isolation is possible, or otherwise use the `PostMessage` channel. For the moment the `ServiceWorker` channel can still be used, but emits a warning at start up. The channel will be removed entirely in a future version of webR.
Expand Down
2 changes: 1 addition & 1 deletion R/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ MAIN_LDFLAGS += -s EXIT_RUNTIME=1
MAIN_LDFLAGS += -s ERROR_ON_UNDEFINED_SYMBOLS=0
MAIN_LDFLAGS += -s EXPORTED_RUNTIME_METHODS=$(EXPORTED_RUNTIME_METHODS)
MAIN_LDFLAGS += -s FETCH=1
MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js
MAIN_LDFLAGS += -lworkerfs.js -lnodefs.js -lidbfs.js
MAIN_LDFLAGS += $(FORTRAN_WASM_LDADD)
MAIN_LDFLAGS += $(WASM_OPT_LDADD)

Expand Down
2 changes: 1 addition & 1 deletion packages/webr/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ Imports:
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
10 changes: 9 additions & 1 deletion packages/webr/R/mount.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@
#' `mountpoint`. This filesystem type can only be used when webR is running
#' under Node.
#'
#' When mounting an Emscripten "idbfs" type filesystem, files will be persisted
#' or populated from/to a browser-based IndexedDB database when the JavaScript
#' function `Module.FS.syncfs` is invoked. See the Emscripten `IDBFS`
#' documentation for more information. This filesystem type can only be used
#' when webR is running in a web browser.
#'
#' @param mountpoint a character string giving the path to a directory to mount
#' onto in the Emscripten virtual filesystem.
#' @param source a character string giving the location of the data source to be
#' mounted.
#' @param type a character string giving the type of Emscripten filesystem to be
#' mounted: "workerfs" or "nodefs".
#' mounted: "workerfs", "nodefs", or "idbfs".
#'
#' @export
mount <- function(mountpoint, source, type = "workerfs") {
Expand All @@ -34,6 +40,8 @@ mount <- function(mountpoint, source, type = "workerfs") {
invisible(.Call(ffi_mount_workerfs, base_url, mountpoint))
} else if (tolower(type) == "nodefs") {
invisible(.Call(ffi_mount_nodefs, source, mountpoint))
} else if (tolower(type) == "idbfs") {
invisible(.Call(ffi_mount_idbfs, mountpoint))
} else {
stop(paste("Unsupported Emscripten Filesystem type:", type))
}
Expand Down
8 changes: 7 additions & 1 deletion packages/webr/man/mount.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages/webr/src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern SEXP ffi_dev_canvas_cache(void);
extern SEXP ffi_dev_canvas_destroy(SEXP);
extern SEXP ffi_mount_workerfs(SEXP, SEXP);
extern SEXP ffi_mount_nodefs(SEXP, SEXP);
extern SEXP ffi_mount_idbfs(SEXP);
extern SEXP ffi_unmount(SEXP);

static
Expand All @@ -23,6 +24,7 @@ const R_CallMethodDef CallEntries[] = {
{ "ffi_dev_canvas_destroy", (DL_FUNC) &ffi_dev_canvas_destroy, 1},
{ "ffi_mount_workerfs", (DL_FUNC) &ffi_mount_workerfs, 2},
{ "ffi_mount_nodefs", (DL_FUNC) &ffi_mount_nodefs, 2},
{ "ffi_mount_idbfs", (DL_FUNC) &ffi_mount_idbfs, 1},
{ "ffi_unmount", (DL_FUNC) &ffi_unmount, 1},
{ NULL, NULL, 0}
};
Expand Down
31 changes: 31 additions & 0 deletions packages/webr/src/mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,37 @@ SEXP ffi_mount_nodefs(SEXP source, SEXP mountpoint) {
#endif
}

SEXP ffi_mount_idbfs(SEXP mountpoint) {
#ifdef __EMSCRIPTEN__
CHECK_STRING(mountpoint);

EM_ASM({
// Stop if we're not able to use a IDBFS filesystem object
if (typeof IN_NODE === 'boolean' && IN_NODE === true) {
const msg = Module.allocateUTF8OnStack(
'The `IDBFS` filesystem object can only be used when running in a web browser.'
);
Module._Rf_error(msg);
}
const mountpoint = UTF8ToString($0);
try {
Module.FS.mount(Module.FS.filesystems.IDBFS, {}, mountpoint);
} catch (e) {
let msg = e.message;
if (e.name === "ErrnoError" && e.errno === 10) {
const dir = Module.UTF8ToString($0);
msg = "Unable to mount directory, `" + dir + "` is already mounted.";
}
Module._Rf_error(Module.allocateUTF8OnStack(msg));
}
}, R_CHAR(STRING_ELT(mountpoint, 0)));

return R_NilValue;
#else
Rf_error("Function must be running under Emscripten.");
#endif
}

SEXP ffi_unmount(SEXP mountpoint) {
#ifdef __EMSCRIPTEN__
CHECK_STRING(mountpoint);
Expand Down
73 changes: 69 additions & 4 deletions src/docs/mounting.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@ The [Emscripten filesystem API](https://emscripten.org/docs/api_reference/Filesy

Mounting images and directories in this way gives the Wasm R process access to arbitrary external data, potentially including datasets, scripts, or R packages [pre-compiled for WebAssembly](building.qmd).

Emscripten's API provides several types of virtual filesystem, but for technical reasons^[Currently, webR blocks in the JavaScript worker thread while it waits for R input to be evaluated. This blocking means that Emscripten filesystems that depend on asynchronous browser APIs, such as [`IDBFS`](https://emscripten.org/docs/api_reference/Filesystem-API.html#filesystem-api-idbfs), do not work.] only the following filesystems are available for use with webR.
Emscripten's API allows for several types of virtual filesystem, depending on the execution environment. The following filesystems are available for use with webR:

| Filesystem | Description | Web Browser | Node.js |
|------|-----|------|------|
| `WORKERFS` | Mount filesystem images. | &#x2705; | &#x2705; |
| `WORKERFS` | Mount Emscripten filesystem images. | &#x2705; | &#x2705;[^workerfs] |
| `NODEFS` | Mount existing host directories. | &#x274C; | &#x2705; |
| `IDBFS` | Browser-based persistent storage using the [IndexedDB API](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API). | &#x2705;[^idbfs] | &#x274C; |

[^workerfs]: Be aware of the current GitHub issue [#328](https://github.com/r-wasm/webr/issues/328).
[^idbfs]: Using the `PostMessage` [communication channel](communication.qmd) only.

## Emscripten filesystem images

Emscripten filesystem images can be mounted using the `WORKERFS` filesystem type.

The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool, provided by Emscripten, takes in a directory structure as input and produces webR compatible filesystem images as output. The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool may be invoked from R using the [rwasm](https://r-wasm.github.io/rwasm/) R package:

```{r eval=FALSE}
Expand Down Expand Up @@ -105,12 +111,12 @@ See the [Emscripten `FS.mount()` documentation](https://emscripten.org/docs/api_

## Mount an existing host directory

The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process.

::: callout-warning
`NODEFS` is only available when running webR under Node.js.
:::

The `NODEFS` filesystem type maps directories that exist on the host machine so that they are accessible in the WebAssembly process.

To mount the directory `./extra` on the virtual filesystem at `/data`, use either the JavaScript or R mount API with the filesystem type set to `"NODEFS"`.

::: {.panel-tabset}
Expand All @@ -130,6 +136,65 @@ webr::mount(
)
```

:::

## IndexedDB-based persistent storage

When using webR in a web browser, an [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API)-based persistent storage space can be mounted using the `IDBFS` filesystem type.

::: {.callout-warning}

Due to how webR blocks for input in the worker thread, the `IDBFS` filesystem type **does not work** when using the `SharedArrayBuffer` communication channel. WebR must be configured to use the `PostMessage` communication channel to use `IDBFS` persistent storage.

:::

### Mounting

First, create the directory to contain persistent file storage, and then mount using the `IDBFS` filesystem type.

::: {.panel-tabset}
## JavaScript

``` javascript
await webR.FS.mkdir('/data');
await webR.FS.mount('IDBFS', {}, '/data');
await webR.FS.syncfs(true);
```

## R
```{r eval=FALSE}
webr::mount(mountpoint = "/data", type = "IDBFS")
webr::eval_js("Module.FS.syncfs(true, () => {})")
```

:::

### Populating and persisting files

After mounting the filesystem using [`WebR.FS.mount()`](api/js/classes/WebR.WebR.md#fs), the Emscripten FS API function [`FS.syncfs()`](https://emscripten.org/docs/api_reference/Filesystem-API.html#FS.syncfs) has been invoked with the `populate` argument set to `true`. This extra step is **required** to initialise the virtual filesystem with any previously existing data files in the browser's IndexedDB storage. Without it, the filesystem will always be initially mounted as an empty directory.

A similar `FS.syncfs()` step is required to persist the current state of the filesystem to the browser's IndexedDB storage.

::: {.panel-tabset}
## JavaScript

``` javascript
await webR.FS.syncfs(false);
```

## R
```{r eval=FALSE}
webr::eval_js("Module.FS.syncfs(false, () => {})")
```

:::

Here the populate argument is set to `false`, meaning to save the current state of the filesystem to IndexedDB. When writing to the filesystem you should be sure to invoke `syncfs(false)` before the web page containing webR is closed to ensure that the output file data will be persisted in the backing IndexedDB storage.

### Web storage caveats

Filesystem data stored in an IndexedDB database can only be accessed within the current [origin](https://developer.mozilla.org/en-US/docs/Glossary/Origin), loosely the current web page's host domain and port.

The way in which web browsers decide how much storage space to allocate for data and what to remove when limits are reached differs between browsers and is not always simple to calculate. Be aware of browser [storage quotas and eviction criteria](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria) and note that data stored in an `IDBFS` filesystem type is stored only on a "best-effort" basis. It can be removed by the browser at any time, autonomously or by the user interacting through the browser's UI.

In private browsing mode, for example, stored data is usually deleted when the private session ends.
6 changes: 6 additions & 0 deletions src/webR/webr-chan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ export interface FSMountMessage extends Message {
};
}

/** @internal */
export interface FSSyncfsMessage extends Message {
type: 'syncfs';
data: { populate: boolean };
}

/** @internal */
export interface FSReadFileMessage extends Message {
type: 'readFile';
Expand Down
7 changes: 6 additions & 1 deletion src/webR/webr-main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
EvalROptions,
FSMessage,
FSMountMessage,
FSSyncfsMessage,
FSReadFileMessage,
FSWriteFileMessage,
InstallPackagesOptions,
Expand Down Expand Up @@ -97,7 +98,7 @@ export type FSNode = {
};

/** An Emscripten Filesystem type */
export type FSType = 'NODEFS' | 'WORKERFS';
export type FSType = 'NODEFS' | 'WORKERFS' | 'IDBFS';

/**
* Configuration settings to be used when mounting Filesystem objects with
Expand Down Expand Up @@ -474,6 +475,10 @@ export class WebR {
const msg: FSMountMessage = { type: 'mount', data: { type, options, mountpoint } };
await this.#chan.request(msg);
},
syncfs: async (populate: boolean): Promise<void> => {
const msg: FSSyncfsMessage = { type: 'syncfs', data: { populate } };
await this.#chan.request(msg);
},
readFile: async (path: string, flags?: string): Promise<Uint8Array> => {
const msg: FSReadFileMessage = { type: 'readFile', data: { path, flags } };
const payload = await this.#chan.request(msg);
Expand Down
23 changes: 21 additions & 2 deletions src/webR/webr-worker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { loadScript } from './compat';
import { ChannelWorker } from './chan/channel';
import { newChannelWorker, ChannelInitMessage } from './chan/channel-common';
import { newChannelWorker, ChannelInitMessage, ChannelType } from './chan/channel-common';
import { Message, Request, newResponse } from './chan/message';
import { FSNode, WebROptions } from './webr-main';
import { EmPtr, Module } from './emscripten';
Expand Down Expand Up @@ -32,6 +32,7 @@ import {
ShelterMessage,
ShelterDestroyMessage,
InstallPackagesMessage,
FSSyncfsMessage,
} from './webr-chan';

let initialised = false;
Expand All @@ -47,6 +48,7 @@ const onWorkerMessage = function (msg: Message) {
}
const messageInit = msg as ChannelInitMessage;
chan = newChannelWorker(messageInit);
messageInit.data.config.channelType = messageInit.data.channelType;
init(messageInit.data.config);
initialised = true;
return;
Expand Down Expand Up @@ -108,11 +110,28 @@ function dispatch(msg: Message): void {
}
case 'mount': {
const msg = reqMsg as FSMountMessage;
const fs = Module.FS.filesystems[msg.data.type];
const type = msg.data.type;
if (type === "IDBFS" && _config.channelType == ChannelType.SharedArrayBuffer) {
throw new Error(
'The `IDBFS` filesystem type is not supported under the `SharedArrayBuffer` ' +
'communication channel. The `PostMessage` communication channel must be used.'
);
}
const fs = Module.FS.filesystems[type];
Module.FS.mount(fs, msg.data.options, msg.data.mountpoint);
write({ obj: null, payloadType: 'raw' });
break;
}
case 'syncfs': {
const msg = reqMsg as FSSyncfsMessage;
Module.FS.syncfs(msg.data.populate, (err: string | undefined) => {
if (err) {
throw new Error(`Emscripten \`syncfs\` error: "${err}".`);
}
});
write({ obj: null, payloadType: 'raw' });
break;
}
case 'readFile': {
const msg = reqMsg as FSReadFileMessage;
const reqData = msg.data;
Expand Down

0 comments on commit 8f788a1

Please sign in to comment.